diff --git a/.gitignore b/.gitignore index 6a183cd6b2403505a3bd2e2a0ce959a357443325..224bd2f3a9cf305cc4205f30d7742928de5f8b99 100644 --- a/.gitignore +++ b/.gitignore @@ -21,6 +21,9 @@ #OS X specific files. .DS_store +# Nested build directory +/build + #==============================================================================# # Explicit files to ignore (only matches one). #==============================================================================# @@ -62,8 +65,9 @@ tools/polly tools/avrlit # Sphinx build tree, if building in-source dir. docs/_build -# VSCode config files. +# VS2017 and VSCode config files. .vscode +.vs #==============================================================================# # Files created in tree by the Go bindings. diff --git a/CMakeLists.txt b/CMakeLists.txt index c2c9fe0a68b25ca4c42f69c57947ea81949bce9a..b51bc421fdbf60a5d6dbb2ed23e3ebe322aac420 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -56,17 +56,20 @@ endif() # This should only apply if you are both on an Apple host, and targeting Apple. if(CMAKE_HOST_APPLE AND APPLE) - if(NOT CMAKE_XCRUN) - find_program(CMAKE_XCRUN NAMES xcrun) - endif() - if(CMAKE_XCRUN) - execute_process(COMMAND ${CMAKE_XCRUN} -find libtool - OUTPUT_VARIABLE CMAKE_LIBTOOL - OUTPUT_STRIP_TRAILING_WHITESPACE) - endif() + # if CMAKE_LIBTOOL is not set, try and find it with xcrun or find_program + if(NOT CMAKE_LIBTOOL) + if(NOT CMAKE_XCRUN) + find_program(CMAKE_XCRUN NAMES xcrun) + endif() + if(CMAKE_XCRUN) + execute_process(COMMAND ${CMAKE_XCRUN} -find libtool + OUTPUT_VARIABLE CMAKE_LIBTOOL + OUTPUT_STRIP_TRAILING_WHITESPACE) + endif() - if(NOT CMAKE_LIBTOOL OR NOT EXISTS CMAKE_LIBTOOL) - find_program(CMAKE_LIBTOOL NAMES libtool) + if(NOT CMAKE_LIBTOOL OR NOT EXISTS CMAKE_LIBTOOL) + find_program(CMAKE_LIBTOOL NAMES libtool) + endif() endif() get_property(languages GLOBAL PROPERTY ENABLED_LANGUAGES) @@ -132,18 +135,6 @@ foreach(proj ${LLVM_ENABLE_PROJECTS}) endif() endforeach() -# The following only works with the Ninja generator in CMake >= 3.0. -set(LLVM_PARALLEL_COMPILE_JOBS "" CACHE STRING - "Define the maximum number of concurrent compilation jobs.") -if(LLVM_PARALLEL_COMPILE_JOBS) - if(NOT CMAKE_MAKE_PROGRAM MATCHES "ninja") - message(WARNING "Job pooling is only available with Ninja generators.") - else() - set_property(GLOBAL APPEND PROPERTY JOB_POOLS compile_job_pool=${LLVM_PARALLEL_COMPILE_JOBS}) - set(CMAKE_JOB_POOL_COMPILE compile_job_pool) - endif() -endif() - # Build llvm with ccache if the package is present set(LLVM_CCACHE_BUILD OFF CACHE BOOL "Set to ON for a ccache enabled build") if(LLVM_CCACHE_BUILD) @@ -183,16 +174,7 @@ if(LLVM_BUILD_GLOBAL_ISEL) add_definitions(-DLLVM_BUILD_GLOBAL_ISEL) endif() -set(LLVM_PARALLEL_LINK_JOBS "" CACHE STRING - "Define the maximum number of concurrent link jobs.") -if(LLVM_PARALLEL_LINK_JOBS) - if(NOT CMAKE_MAKE_PROGRAM MATCHES "ninja") - message(WARNING "Job pooling is only available with Ninja generators.") - else() - set_property(GLOBAL APPEND PROPERTY JOB_POOLS link_job_pool=${LLVM_PARALLEL_LINK_JOBS}) - set(CMAKE_JOB_POOL_LINK link_job_pool) - endif() -endif() +option(LLVM_ENABLE_DAGISEL_COV "Debug: Prints tablegen patterns that were used for selecting" OFF) # Add path for custom modules set(CMAKE_MODULE_PATH @@ -385,8 +367,6 @@ set(LLVM_TARGETS_TO_BUILD ${LLVM_EXPERIMENTAL_TARGETS_TO_BUILD}) list(REMOVE_DUPLICATES LLVM_TARGETS_TO_BUILD) -include(AddLLVMDefinitions) - option(LLVM_ENABLE_PIC "Build Position-Independent Code" ON) option(LLVM_ENABLE_WARNINGS "Enable compiler warnings." ON) option(LLVM_ENABLE_MODULES "Compile with C++ modules enabled." OFF) @@ -414,9 +394,6 @@ option(LLVM_ENABLE_EXPENSIVE_CHECKS "Enable expensive checks" OFF) set(LLVM_ABI_BREAKING_CHECKS "WITH_ASSERTS" CACHE STRING "Enable abi-breaking checks. Can be WITH_ASSERTS, FORCE_ON or FORCE_OFF.") -option(LLVM_DISABLE_ABI_BREAKING_CHECKS_ENFORCING - "Disable abi-breaking checks mismatch detection at link-tim." OFF) - option(LLVM_FORCE_USE_OLD_HOST_TOOLCHAIN "Set to ON to force using an old, unsupported host toolchain." OFF) @@ -506,6 +483,10 @@ option(LLVM_INCLUDE_UTILS "Generate build targets for the LLVM utils." ON) option(LLVM_BUILD_UTILS "Build LLVM utility binaries. If OFF, just generate build targets." ON) +option(LLVM_INCLUDE_RUNTIMES "Generate build targets for the LLVM runtimes." ON) +option(LLVM_BUILD_RUNTIMES + "Build the LLVM runtimes. If OFF, just generate build targets." ON) + option(LLVM_BUILD_RUNTIME "Build the LLVM runtime libraries." ON) option(LLVM_BUILD_EXAMPLES @@ -641,7 +622,7 @@ endif (LLVM_USE_OPROFILE) message(STATUS "Constructing LLVMBuild project information") execute_process( - COMMAND ${PYTHON_EXECUTABLE} ${LLVMBUILDTOOL} + COMMAND ${PYTHON_EXECUTABLE} -B ${LLVMBUILDTOOL} --native-target "${LLVM_NATIVE_ARCH}" --enable-targets "${LLVM_TARGETS_TO_BUILD}" --enable-optional-components "${LLVMOPTIONALCOMPONENTS}" @@ -737,6 +718,30 @@ configure_file( ${LLVM_MAIN_INCLUDE_DIR}/llvm/Support/DataTypes.h.cmake ${LLVM_INCLUDE_DIR}/llvm/Support/DataTypes.h) +# Add target for generating source rpm package. +set(LLVM_SRPM_USER_BINARY_SPECFILE ${CMAKE_CURRENT_SOURCE_DIR}/llvm.spec.in + CACHE FILEPATH ".spec file to use for srpm generation") +set(LLVM_SRPM_BINARY_SPECFILE ${CMAKE_CURRENT_BINARY_DIR}/llvm.spec) +set(LLVM_SRPM_DIR "${CMAKE_CURRENT_BINARY_DIR}/srpm") + +# SVN_REVISION and GIT_COMMIT get set by the call to add_version_info_from_vcs. +# DUMMY_VAR contains a version string which we don't care about. +add_version_info_from_vcs(DUMMY_VAR) +if ( SVN_REVISION ) + set(LLVM_RPM_SPEC_REVISION "r${SVN_REVISION}") +elseif ( GIT_COMMIT ) + set (LLVM_RPM_SPEC_REVISION "g${GIT_COMMIT}") +endif() + +configure_file( + ${LLVM_SRPM_USER_BINARY_SPECFILE} + ${LLVM_SRPM_BINARY_SPECFILE} @ONLY) + +add_custom_target(srpm + COMMAND cpack -G TGZ --config CPackSourceConfig.cmake -B ${LLVM_SRPM_DIR}/SOURCES + COMMAND rpmbuild -bs --define '_topdir ${LLVM_SRPM_DIR}' ${LLVM_SRPM_BINARY_SPECFILE}) + + # They are not referenced. See set_output_directory(). set( CMAKE_RUNTIME_OUTPUT_DIRECTORY ${LLVM_BINARY_DIR}/bin ) set( CMAKE_LIBRARY_OUTPUT_DIRECTORY ${LLVM_BINARY_DIR}/lib${LLVM_LIBDIR_SUFFIX} ) @@ -861,7 +866,9 @@ if( LLVM_INCLUDE_TOOLS ) add_subdirectory(tools) endif() -add_subdirectory(runtimes) +if( LLVM_INCLUDE_RUNTIMES ) + add_subdirectory(runtimes) +endif() if( LLVM_INCLUDE_EXAMPLES ) add_subdirectory(examples) diff --git a/CODE_OWNERS.TXT b/CODE_OWNERS.TXT index 3625ebf099f1bc433c077cc2db958c51f25727a3..ec4561d991693c7ebffc07fa693f793d98739104 100644 --- a/CODE_OWNERS.TXT +++ b/CODE_OWNERS.TXT @@ -5,12 +5,9 @@ what goes in or not. The list is sorted by surname and formatted to allow easy grepping and beautification by scripts. The fields are: name (N), email (E), web-address -(W), PGP key ID and fingerprint (P), description (D), and snail-mail address -(S). Each entry should contain at least the (N), (E) and (D) fields. - -N: Joe Abbey -E: jabbey@arxan.com -D: LLVM Bitcode (lib/Bitcode/* include/llvm/Bitcode/*) +(W), PGP key ID and fingerprint (P), description (D), snail-mail address +(S) and (I) IRC handle. Each entry should contain at least the (N), (E) and +(D) fields. N: Justin Bogner E: mail@justinbogner.com @@ -21,6 +18,11 @@ N: Alex Bradbury E: asb@lowrisc.org D: RISC-V backend (lib/Target/RISCV/*) +N: Matthias Braun +E: matze@braunis.de +I: MatzeB +D: Instruction Scheduling + N: Chandler Carruth E: chandlerc@gmail.com E: chandlerc@google.com @@ -34,6 +36,10 @@ N: Eric Christopher E: echristo@gmail.com D: Debug Information, inline assembly +N: Andrey Churbanov +E: andrey.churbanov@intel.com +D: OpenMP runtime library + N: Greg Clayton E: gclayton@apple.com D: LLDB @@ -48,7 +54,7 @@ D: libc++ N: Peter Collingbourne E: peter@pcc.me.uk -D: llgo, libLTO (lib/LTO/* tools/lto/*) +D: llgo, libLTO (lib/LTO/* tools/lto/*), LLVM Bitcode (lib/Bitcode/* include/llvm/Bitcode/*) N: Quentin Colombet E: qcolombet@apple.com @@ -96,7 +102,7 @@ D: MCJIT, RuntimeDyld and JIT event listeners, Orcish Warchief N: Teresa Johnson E: tejohnson@google.com -D: Gold plugin (tools/gold/*) +D: Gold plugin (tools/gold/*) and IR Linker N: Galina Kistanova E: gkistanova@gmail.com @@ -132,7 +138,7 @@ E: david.majnemer@gmail.com D: IR Constant Folder, InstCombine N: Dylan McKay -E: dylanmckay34@gmail.com +E: me@dylanmckay.io D: AVR Backend N: Tim Northover @@ -180,9 +186,8 @@ E: alexei.starovoitov@gmail.com D: BPF backend N: Tom Stellard -E: thomas.stellard@amd.com -E: mesa-dev@lists.freedesktop.org -D: Release manager for the 3.5 and 3.6 branches, R600 Backend, libclc +E: tstellar@redhat.com +D: Stable release management (x.y.[1-9] releases), AMDGPU Backend, libclc N: Evgeniy Stepanov E: eugenis@google.com @@ -192,18 +197,10 @@ N: Craig Topper E: craig.topper@gmail.com D: X86 Backend -N: Andrew Trick -E: atrick@apple.com -D: Instruction Scheduling - N: Ulrich Weigand E: uweigand@de.ibm.com D: SystemZ Backend -N: Teresa Johnson -E: tejohnson@google.com -D: IR Linker - N: Hans Wennborg E: hans@chromium.org D: Release management (x.y.0 releases) @@ -211,7 +208,3 @@ D: Release management (x.y.0 releases) N: whitequark E: whitequark@whitequark.org D: OCaml bindings - -N: Andrey Churbanov -E: andrey.churbanov@intel.com -D: OpenMP runtime library diff --git a/CREDITS.TXT b/CREDITS.TXT index c354900a6e92d1a6c0ad7bafacb99db5bf77b9cd..15d822a680911f07fe48f33f9e733099e8edc396 100644 --- a/CREDITS.TXT +++ b/CREDITS.TXT @@ -457,6 +457,10 @@ N: Adam Treat E: manyoso@yahoo.com D: C++ bugs filed, and C++ front-end bug fixes. +N: Andrew Trick +E: atrick@apple.com +D: Instruction Scheduling, ... + N: Lauro Ramos Venancio E: lauro.venancio@indt.org.br D: ARM backend improvements diff --git a/bindings/go/llvm/DIBuilderBindings.cpp b/bindings/go/llvm/DIBuilderBindings.cpp index 42aa819c7961e91b0b28247d6b2a2b47a19db6ed..53e223d67b4e8e9636c7a0be8533a7ccdf73c154 100644 --- a/bindings/go/llvm/DIBuilderBindings.cpp +++ b/bindings/go/llvm/DIBuilderBindings.cpp @@ -119,7 +119,8 @@ LLVMMetadataRef LLVMDIBuilderCreatePointerType(LLVMDIBuilderRef Dref, const char *Name) { DIBuilder *D = unwrap(Dref); return wrap(D->createPointerType(unwrap(PointeeType), SizeInBits, - AlignInBits, Name)); + AlignInBits, /* DWARFAddressSpace */ None, + Name)); } LLVMMetadataRef diff --git a/bindings/go/llvm/IRBindings.cpp b/bindings/go/llvm/IRBindings.cpp index 20cc05043f28620c5826a09d90dc430aea33e41c..4bfa1bbaf0cc8ac81972947abbc97350e2ec3267 100644 --- a/bindings/go/llvm/IRBindings.cpp +++ b/bindings/go/llvm/IRBindings.cpp @@ -14,6 +14,7 @@ #include "IRBindings.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/DebugLoc.h" +#include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/LLVMContext.h" @@ -71,6 +72,18 @@ void LLVMSetCurrentDebugLocation2(LLVMBuilderRef Bref, unsigned Line, InlinedAt ? unwrap(InlinedAt) : nullptr)); } +LLVMDebugLocMetadata LLVMGetCurrentDebugLocation2(LLVMBuilderRef Bref) { + const auto& Loc = unwrap(Bref)->getCurrentDebugLocation(); + const auto* InlinedAt = Loc.getInlinedAt(); + const LLVMDebugLocMetadata md{ + Loc.getLine(), + Loc.getCol(), + wrap(Loc.getScope()), + InlinedAt == nullptr ? nullptr : wrap(InlinedAt->getRawInlinedAt()), + }; + return md; +} + void LLVMSetSubprogram(LLVMValueRef Func, LLVMMetadataRef SP) { unwrap(Func)->setSubprogram(unwrap(SP)); } diff --git a/bindings/go/llvm/IRBindings.h b/bindings/go/llvm/IRBindings.h index 21147712ed5b4a026f4a5282193b625070249131..f4f490391d4f1107558ee889af0628a93cb39b66 100644 --- a/bindings/go/llvm/IRBindings.h +++ b/bindings/go/llvm/IRBindings.h @@ -27,6 +27,12 @@ extern "C" { #endif typedef struct LLVMOpaqueMetadata *LLVMMetadataRef; +struct LLVMDebugLocMetadata{ + unsigned Line; + unsigned Col; + LLVMMetadataRef Scope; + LLVMMetadataRef InlinedAt; +}; LLVMMetadataRef LLVMConstantAsMetadata(LLVMValueRef Val); @@ -46,6 +52,8 @@ void LLVMSetCurrentDebugLocation2(LLVMBuilderRef Bref, unsigned Line, unsigned Col, LLVMMetadataRef Scope, LLVMMetadataRef InlinedAt); +struct LLVMDebugLocMetadata LLVMGetCurrentDebugLocation2(LLVMBuilderRef Bref); + void LLVMSetSubprogram(LLVMValueRef Fn, LLVMMetadataRef SP); #ifdef __cplusplus diff --git a/bindings/go/llvm/ir.go b/bindings/go/llvm/ir.go index b263c07c512d8e69953f53a7042625d86e8230f4..fe191beb38132ef501e74178d9acf682a218cd90 100644 --- a/bindings/go/llvm/ir.go +++ b/bindings/go/llvm/ir.go @@ -1226,9 +1226,23 @@ func (b Builder) InsertWithName(instr Value, name string) { func (b Builder) Dispose() { C.LLVMDisposeBuilder(b.C) } // Metadata +type DebugLoc struct { + Line, Col uint + Scope Metadata + InlinedAt Metadata +} func (b Builder) SetCurrentDebugLocation(line, col uint, scope, inlinedAt Metadata) { C.LLVMSetCurrentDebugLocation2(b.C, C.unsigned(line), C.unsigned(col), scope.C, inlinedAt.C) } +// Get current debug location. Please do not call this function until setting debug location with SetCurrentDebugLocation() +func (b Builder) GetCurrentDebugLocation() (loc DebugLoc) { + md := C.LLVMGetCurrentDebugLocation2(b.C) + loc.Line = uint(md.Line) + loc.Col = uint(md.Col) + loc.Scope = Metadata{C: md.Scope} + loc.InlinedAt = Metadata{C: md.InlinedAt} + return +} func (b Builder) SetInstDebugLocation(v Value) { C.LLVMSetInstDebugLocation(b.C, v.C) } func (b Builder) InsertDeclare(module Module, storage Value, md Value) Value { f := module.NamedFunction("llvm.dbg.declare") diff --git a/bindings/go/llvm/ir_test.go b/bindings/go/llvm/ir_test.go index 13e113957b4d029b3672f822dd77b9f50eb83b3f..c823615a4293c8a2e2997b9fdbaf0a444e0081a3 100644 --- a/bindings/go/llvm/ir_test.go +++ b/bindings/go/llvm/ir_test.go @@ -95,3 +95,42 @@ func TestAttributes(t *testing.T) { testAttribute(t, name) } } + +func TestDebugLoc(t *testing.T) { + mod := NewModule("") + defer mod.Dispose() + + ctx := mod.Context() + + b := ctx.NewBuilder() + defer b.Dispose() + + d := NewDIBuilder(mod) + defer func() { + d.Destroy() + }() + file := d.CreateFile("dummy_file", "dummy_dir") + voidInfo := d.CreateBasicType(DIBasicType{Name: "void"}) + typeInfo := d.CreateSubroutineType(DISubroutineType{file, []Metadata{voidInfo}}) + scope := d.CreateFunction(file, DIFunction{ + Name: "foo", + LinkageName: "foo", + Line: 10, + ScopeLine: 10, + Type: typeInfo, + File: file, + IsDefinition: true, + }) + + b.SetCurrentDebugLocation(10, 20, scope, Metadata{}) + loc := b.GetCurrentDebugLocation() + if loc.Line != 10 { + t.Errorf("Got line %d, though wanted 10", loc.Line) + } + if loc.Col != 20 { + t.Errorf("Got column %d, though wanted 20", loc.Col) + } + if loc.Scope.C != scope.C { + t.Errorf("Got metadata %v as scope, though wanted %v", loc.Scope.C, scope.C) + } +} diff --git a/bindings/go/llvm/transforms_pmbuilder.go b/bindings/go/llvm/transforms_pmbuilder.go index 3d79d6e2f3273939b8ac7577538fa1073b023957..b164e58812b1553cf4995f7085b32ec0d44977b4 100644 --- a/bindings/go/llvm/transforms_pmbuilder.go +++ b/bindings/go/llvm/transforms_pmbuilder.go @@ -43,6 +43,26 @@ func (pmb PassManagerBuilder) PopulateFunc(pm PassManager) { C.LLVMPassManagerBuilderPopulateFunctionPassManager(pmb.C, pm.C) } +func (pmb PassManagerBuilder) PopulateLTOPassManager(pm PassManager, internalize bool, runInliner bool) { + C.LLVMPassManagerBuilderPopulateLTOPassManager(pmb.C, pm.C, boolToLLVMBool(internalize), boolToLLVMBool(runInliner)) +} + func (pmb PassManagerBuilder) Dispose() { C.LLVMPassManagerBuilderDispose(pmb.C) } + +func (pmb PassManagerBuilder) SetDisableUnitAtATime(val bool) { + C.LLVMPassManagerBuilderSetDisableUnitAtATime(pmb.C, boolToLLVMBool(val)) +} + +func (pmb PassManagerBuilder) SetDisableUnrollLoops(val bool) { + C.LLVMPassManagerBuilderSetDisableUnrollLoops(pmb.C, boolToLLVMBool(val)) +} + +func (pmb PassManagerBuilder) SetDisableSimplifyLibCalls(val bool) { + C.LLVMPassManagerBuilderSetDisableSimplifyLibCalls(pmb.C, boolToLLVMBool(val)) +} + +func (pmb PassManagerBuilder) UseInlinerWithThreshold(threshold uint) { + C.LLVMPassManagerBuilderUseInlinerWithThreshold(pmb.C, C.uint(threshold)) +} diff --git a/cmake/config-ix.cmake b/cmake/config-ix.cmake index a96a722f34d6bbbcd65eaf5a5982a76dd23bb65f..0331d0fa10abf161af11ffdfbf401681febcf6d8 100755 --- a/cmake/config-ix.cmake +++ b/cmake/config-ix.cmake @@ -46,7 +46,6 @@ endfunction() check_include_file(dirent.h HAVE_DIRENT_H) check_include_file(dlfcn.h HAVE_DLFCN_H) check_include_file(errno.h HAVE_ERRNO_H) -check_include_file(execinfo.h HAVE_EXECINFO_H) check_include_file(fcntl.h HAVE_FCNTL_H) check_include_file(inttypes.h HAVE_INTTYPES_H) check_include_file(link.h HAVE_LINK_H) @@ -88,6 +87,15 @@ if(APPLE) HAVE_CRASHREPORTER_INFO) endif() +if(${CMAKE_SYSTEM_NAME} STREQUAL "Linux") + check_include_file(linux/magic.h HAVE_LINUX_MAGIC_H) + if(NOT HAVE_LINUX_MAGIC_H) + # older kernels use split files + check_include_file(linux/nfs_fs.h HAVE_LINUX_NFS_FS_H) + check_include_file(linux/smb.h HAVE_LINUX_SMB_H) + endif() +endif() + # library checks if( NOT PURE_WINDOWS ) check_library_exists(pthread pthread_create "" HAVE_LIBPTHREAD) @@ -115,7 +123,7 @@ if(HAVE_LIBPTHREAD) set(CMAKE_THREAD_PREFER_PTHREAD TRUE) set(THREADS_HAVE_PTHREAD_ARG Off) find_package(Threads REQUIRED) - set(PTHREAD_LIB ${CMAKE_THREAD_LIBS_INIT}) + set(LLVM_PTHREAD_LIB ${CMAKE_THREAD_LIBS_INIT}) endif() # Don't look for these libraries on Windows. Also don't look for them if we're @@ -156,7 +164,9 @@ endif() # function checks check_symbol_exists(arc4random "stdlib.h" HAVE_DECL_ARC4RANDOM) -check_symbol_exists(backtrace "execinfo.h" HAVE_BACKTRACE) +find_package(Backtrace) +set(HAVE_BACKTRACE ${Backtrace_FOUND}) +set(BACKTRACE_HEADER ${Backtrace_HEADER}) check_symbol_exists(_Unwind_Backtrace "unwind.h" HAVE__UNWIND_BACKTRACE) check_symbol_exists(getpagesize unistd.h HAVE_GETPAGESIZE) check_symbol_exists(sysconf unistd.h HAVE_SYSCONF) @@ -227,6 +237,7 @@ if( HAVE_DLFCN_H ) list(APPEND CMAKE_REQUIRED_LIBRARIES dl) endif() check_symbol_exists(dlopen dlfcn.h HAVE_DLOPEN) + check_symbol_exists(dladdr dlfcn.h HAVE_DLADDR) if( HAVE_LIBDL ) list(REMOVE_ITEM CMAKE_REQUIRED_LIBRARIES dl) endif() @@ -234,7 +245,15 @@ endif() check_symbol_exists(__GLIBC__ stdio.h LLVM_USING_GLIBC) if( LLVM_USING_GLIBC ) - add_llvm_definitions( -D_GNU_SOURCE ) + add_definitions( -D_GNU_SOURCE ) +endif() +# This check requires _GNU_SOURCE +if(HAVE_LIBPTHREAD) + check_library_exists(pthread pthread_getname_np "" HAVE_PTHREAD_GETNAME_NP) + check_library_exists(pthread pthread_setname_np "" HAVE_PTHREAD_SETNAME_NP) +elseif(PTHREAD_IN_LIBC) + check_library_exists(c pthread_getname_np "" HAVE_PTHREAD_GETNAME_NP) + check_library_exists(c pthread_setname_np "" HAVE_PTHREAD_SETNAME_NP) endif() set(headers "sys/types.h") @@ -545,6 +564,9 @@ set(LLVM_BINUTILS_INCDIR "" CACHE PATH "PATH to binutils/include containing plugin-api.h for gold plugin.") if(CMAKE_HOST_APPLE AND APPLE) + if(NOT CMAKE_XCRUN) + find_program(CMAKE_XCRUN NAMES xcrun) + endif() if(CMAKE_XCRUN) execute_process(COMMAND ${CMAKE_XCRUN} -find ld OUTPUT_VARIABLE LD64_EXECUTABLE diff --git a/cmake/modules/AddLLVM.cmake b/cmake/modules/AddLLVM.cmake index b3c7746c480ade18bbd1f688ca5f323711fc00aa..7f7608cff33d3e0c683272656fc4bd9d5fa6592f 100644 --- a/cmake/modules/AddLLVM.cmake +++ b/cmake/modules/AddLLVM.cmake @@ -718,11 +718,11 @@ macro(add_llvm_executable name) if(NOT ARG_IGNORE_EXTERNALIZE_DEBUGINFO) llvm_externalize_debuginfo(${name}) endif() - if (PTHREAD_LIB) + if (LLVM_PTHREAD_LIB) # libpthreads overrides some standard library symbols, so main # executable must be linked with it in order to provide consistent # API for all shared libaries loaded by this executable. - target_link_libraries(${name} ${PTHREAD_LIB}) + target_link_libraries(${name} ${LLVM_PTHREAD_LIB}) endif() endmacro(add_llvm_executable name) @@ -1027,7 +1027,7 @@ function(add_unittest test_suite test_name) # libpthreads overrides some standard library symbols, so main # executable must be linked with it in order to provide consistent # API for all shared libaries loaded by this executable. - target_link_libraries(${test_name} gtest_main gtest ${PTHREAD_LIB}) + target_link_libraries(${test_name} gtest_main gtest ${LLVM_PTHREAD_LIB}) add_dependencies(${test_suite} ${test_name}) get_target_property(test_suite_folder ${test_suite} FOLDER) @@ -1387,7 +1387,11 @@ function(llvm_externalize_debuginfo name) endif() if(NOT LLVM_EXTERNALIZE_DEBUGINFO_SKIP_STRIP) - set(strip_command COMMAND xcrun strip -Sxl $) + if(APPLE) + set(strip_command COMMAND xcrun strip -Sxl $) + else() + set(strip_command COMMAND strip -gx $) + endif() endif() if(APPLE) @@ -1403,7 +1407,11 @@ function(llvm_externalize_debuginfo name) ${strip_command} ) else() - message(FATAL_ERROR "LLVM_EXTERNALIZE_DEBUGINFO isn't implemented for non-darwin platforms!") + add_custom_command(TARGET ${name} POST_BUILD + COMMAND objcopy --only-keep-debug $ $.debug + ${strip_command} -R .gnu_debuglink + COMMAND objcopy --add-gnu-debuglink=$.debug $ + ) endif() endfunction() diff --git a/cmake/modules/AddSphinxTarget.cmake b/cmake/modules/AddSphinxTarget.cmake index 3456b536e80acc80523a2b733dc86ea9babe1d7b..cfc7f38e9e7776e81f79993b713584de935fbb1a 100644 --- a/cmake/modules/AddSphinxTarget.cmake +++ b/cmake/modules/AddSphinxTarget.cmake @@ -48,10 +48,15 @@ function (add_sphinx_target builder project) # Handle installation if (NOT LLVM_INSTALL_TOOLCHAIN_ONLY) if (builder STREQUAL man) + if (CMAKE_INSTALL_MANDIR) + set(INSTALL_MANDIR ${CMAKE_INSTALL_MANDIR}/) + else() + set(INSTALL_MANDIR share/man/) + endif() # FIXME: We might not ship all the tools that these man pages describe install(DIRECTORY "${SPHINX_BUILD_DIR}/" # Slash indicates contents of COMPONENT "${project}-sphinx-man" - DESTINATION share/man/man1) + DESTINATION ${INSTALL_MANDIR}man1) elseif (builder STREQUAL html) string(TOUPPER "${project}" project_upper) diff --git a/cmake/modules/HandleLLVMOptions.cmake b/cmake/modules/HandleLLVMOptions.cmake index dee9bdc6e3ecddfbd9ea6536c1e46a4f83a4bdee..dd44476bc996426a7b5a7832e6eade5f75ec81bb 100644 --- a/cmake/modules/HandleLLVMOptions.cmake +++ b/cmake/modules/HandleLLVMOptions.cmake @@ -8,12 +8,41 @@ string(TOUPPER "${CMAKE_BUILD_TYPE}" uppercase_CMAKE_BUILD_TYPE) include(CheckCompilerVersion) include(HandleLLVMStdlib) -include(AddLLVMDefinitions) include(CheckCCompilerFlag) include(CheckCXXCompilerFlag) +if(CMAKE_LINKER MATCHES "lld-link.exe" OR (WIN32 AND LLVM_USE_LINKER STREQUAL "lld")) + set(LINKER_IS_LLD_LINK TRUE) +else() + set(LINKER_IS_LLD_LINK FALSE) +endif() + +# Ninja Job Pool support +# The following only works with the Ninja generator in CMake >= 3.0. +set(LLVM_PARALLEL_COMPILE_JOBS "" CACHE STRING + "Define the maximum number of concurrent compilation jobs.") +if(LLVM_PARALLEL_COMPILE_JOBS) + if(NOT CMAKE_MAKE_PROGRAM MATCHES "ninja") + message(WARNING "Job pooling is only available with Ninja generators.") + else() + set_property(GLOBAL APPEND PROPERTY JOB_POOLS compile_job_pool=${LLVM_PARALLEL_COMPILE_JOBS}) + set(CMAKE_JOB_POOL_COMPILE compile_job_pool) + endif() +endif() -if (CMAKE_LINKER MATCHES "lld-link.exe") +set(LLVM_PARALLEL_LINK_JOBS "" CACHE STRING + "Define the maximum number of concurrent link jobs.") +if(LLVM_PARALLEL_LINK_JOBS) + if(NOT CMAKE_MAKE_PROGRAM MATCHES "ninja") + message(WARNING "Job pooling is only available with Ninja generators.") + else() + set_property(GLOBAL APPEND PROPERTY JOB_POOLS link_job_pool=${LLVM_PARALLEL_LINK_JOBS}) + set(CMAKE_JOB_POOL_LINK link_job_pool) + endif() +endif() + + +if (LINKER_IS_LLD_LINK) # Pass /MANIFEST:NO so that CMake doesn't run mt.exe on our binaries. Adding # manifests with mt.exe breaks LLD's symbol tables and takes as much time as # the link. See PR24476. @@ -223,10 +252,10 @@ if( MSVC_IDE ) "Number of parallel compiler jobs. 0 means use all processors. Default is 0.") if( NOT LLVM_COMPILER_JOBS STREQUAL "1" ) if( LLVM_COMPILER_JOBS STREQUAL "0" ) - add_llvm_definitions( /MP ) + add_definitions( /MP ) else() message(STATUS "Number of parallel compiler jobs set to " ${LLVM_COMPILER_JOBS}) - add_llvm_definitions( /MP${LLVM_COMPILER_JOBS} ) + add_definitions( /MP${LLVM_COMPILER_JOBS} ) endif() else() message(STATUS "Parallel compilation disabled") @@ -255,17 +284,17 @@ if( MSVC ) if( CMAKE_CXX_COMPILER_VERSION VERSION_LESS 19.0 ) # For MSVC 2013, disable iterator null pointer checking in debug mode, # especially so std::equal(nullptr, nullptr, nullptr) will not assert. - add_llvm_definitions("-D_DEBUG_POINTER_IMPL=") + add_definitions("-D_DEBUG_POINTER_IMPL=") endif() include(ChooseMSVCCRT) if( MSVC11 ) - add_llvm_definitions(-D_VARIADIC_MAX=10) + add_definitions(-D_VARIADIC_MAX=10) endif() # Add definitions that make MSVC much less annoying. - add_llvm_definitions( + add_definitions( # For some reason MS wants to deprecate a bunch of standard functions... -D_CRT_SECURE_NO_DEPRECATE -D_CRT_SECURE_NO_WARNINGS @@ -276,94 +305,15 @@ if( MSVC ) ) # Tell MSVC to use the Unicode version of the Win32 APIs instead of ANSI. - add_llvm_definitions( + add_definitions( -DUNICODE -D_UNICODE ) - set(msvc_warning_flags - # Disabled warnings. - -wd4141 # Suppress ''modifier' : used more than once' (because of __forceinline combined with inline) - -wd4146 # Suppress 'unary minus operator applied to unsigned type, result still unsigned' - -wd4180 # Suppress 'qualifier applied to function type has no meaning; ignored' - -wd4244 # Suppress ''argument' : conversion from 'type1' to 'type2', possible loss of data' - -wd4258 # Suppress ''var' : definition from the for loop is ignored; the definition from the enclosing scope is used' - -wd4267 # Suppress ''var' : conversion from 'size_t' to 'type', possible loss of data' - -wd4291 # Suppress ''declaration' : no matching operator delete found; memory will not be freed if initialization throws an exception' - -wd4345 # Suppress 'behavior change: an object of POD type constructed with an initializer of the form () will be default-initialized' - -wd4351 # Suppress 'new behavior: elements of array 'array' will be default initialized' - -wd4355 # Suppress ''this' : used in base member initializer list' - -wd4456 # Suppress 'declaration of 'var' hides local variable' - -wd4457 # Suppress 'declaration of 'var' hides function parameter' - -wd4458 # Suppress 'declaration of 'var' hides class member' - -wd4459 # Suppress 'declaration of 'var' hides global declaration' - -wd4503 # Suppress ''identifier' : decorated name length exceeded, name was truncated' - -wd4624 # Suppress ''derived class' : destructor could not be generated because a base class destructor is inaccessible' - -wd4722 # Suppress 'function' : destructor never returns, potential memory leak - -wd4800 # Suppress ''type' : forcing value to bool 'true' or 'false' (performance warning)' - -wd4100 # Suppress 'unreferenced formal parameter' - -wd4127 # Suppress 'conditional expression is constant' - -wd4512 # Suppress 'assignment operator could not be generated' - -wd4505 # Suppress 'unreferenced local function has been removed' - -wd4610 # Suppress ' can never be instantiated' - -wd4510 # Suppress 'default constructor could not be generated' - -wd4702 # Suppress 'unreachable code' - -wd4245 # Suppress 'signed/unsigned mismatch' - -wd4706 # Suppress 'assignment within conditional expression' - -wd4310 # Suppress 'cast truncates constant value' - -wd4701 # Suppress 'potentially uninitialized local variable' - -wd4703 # Suppress 'potentially uninitialized local pointer variable' - -wd4389 # Suppress 'signed/unsigned mismatch' - -wd4611 # Suppress 'interaction between '_setjmp' and C++ object destruction is non-portable' - -wd4805 # Suppress 'unsafe mix of type and type in operation' - -wd4204 # Suppress 'nonstandard extension used : non-constant aggregate initializer' - -wd4577 # Suppress 'noexcept used with no exception handling mode specified; termination on exception is not guaranteed' - -wd4091 # Suppress 'typedef: ignored on left of '' when no variable is declared' - # C4592 is disabled because of false positives in Visual Studio 2015 - # Update 1. Re-evaluate the usefulness of this diagnostic with Update 2. - -wd4592 # Suppress ''var': symbol will be dynamically initialized (implementation limitation) - -wd4319 # Suppress ''operator' : zero extending 'type' to 'type' of greater size' - - # Ideally, we'd like this warning to be enabled, but MSVC 2013 doesn't - # support the 'aligned' attribute in the way that clang sources requires (for - # any code that uses the LLVM_ALIGNAS macro), so this is must be disabled to - # avoid unwanted alignment warnings. - # When we switch to requiring a version of MSVC that supports the 'alignas' - # specifier (MSVC 2015?) this warning can be re-enabled. - -wd4324 # Suppress 'structure was padded due to __declspec(align())' - - # Promoted warnings. - -w14062 # Promote 'enumerator in switch of enum is not handled' to level 1 warning. - - # Promoted warnings to errors. - -we4238 # Promote 'nonstandard extension used : class rvalue used as lvalue' to error. - ) - - # Enable warnings - if (LLVM_ENABLE_WARNINGS) - # Put /W4 in front of all the -we flags. cl.exe doesn't care, but for - # clang-cl having /W4 after the -we flags will re-enable the warnings - # disabled by -we. - set(msvc_warning_flags "/W4 ${msvc_warning_flags}") - # CMake appends /W3 by default, and having /W3 followed by /W4 will result in - # cl : Command line warning D9025 : overriding '/W3' with '/W4'. Since this is - # a command line warning and not a compiler warning, it cannot be suppressed except - # by fixing the command line. - string(REGEX REPLACE " /W[0-4]" "" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}") - string(REGEX REPLACE " /W[0-4]" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") - - if (LLVM_ENABLE_PEDANTIC) - # No MSVC equivalent available - endif (LLVM_ENABLE_PEDANTIC) - endif (LLVM_ENABLE_WARNINGS) if (LLVM_ENABLE_WERROR) - append("/WX" msvc_warning_flags) + append("/WX" CMAKE_C_FLAGS CMAKE_CXX_FLAGS) endif (LLVM_ENABLE_WERROR) - foreach(flag ${msvc_warning_flags}) - append("${flag}" CMAKE_C_FLAGS CMAKE_CXX_FLAGS) - endforeach(flag) - append("/Zc:inline" CMAKE_C_FLAGS CMAKE_CXX_FLAGS) # /Zc:strictStrings is incompatible with VS12's (Visual Studio 2013's) @@ -383,11 +333,13 @@ if( MSVC ) # "Enforce type conversion rules". append("/Zc:rvalueCast" CMAKE_CXX_FLAGS) - if (CMAKE_CXX_COMPILER_ID MATCHES "Clang") + if (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND NOT LLVM_ENABLE_LTO) # clang-cl and cl by default produce non-deterministic binaries because # link.exe /incremental requires a timestamp in the .obj file. clang-cl # has the flag /Brepro to force deterministic binaries. We want to pass that - # whenever you're building with clang unless you're passing /incremental. + # whenever you're building with clang unless you're passing /incremental + # or using LTO (/Brepro with LTO would result in a warning about the flag + # being unused, because we're not generating object files). # This checks CMAKE_CXX_COMPILER_ID in addition to check_cxx_compiler_flag() # because cl.exe does not emit an error on flags it doesn't understand, # letting check_cxx_compiler_flag() claim it understands all flags. @@ -411,63 +363,6 @@ if( MSVC ) endif() elseif( LLVM_COMPILER_IS_GCC_COMPATIBLE ) - if (LLVM_ENABLE_WARNINGS) - append("-Wall -W -Wno-unused-parameter -Wwrite-strings" CMAKE_C_FLAGS CMAKE_CXX_FLAGS) - append("-Wcast-qual" CMAKE_CXX_FLAGS) - - # Turn off missing field initializer warnings for gcc to avoid noise from - # false positives with empty {}. Turn them on otherwise (they're off by - # default for clang). - check_cxx_compiler_flag("-Wmissing-field-initializers" CXX_SUPPORTS_MISSING_FIELD_INITIALIZERS_FLAG) - if (CXX_SUPPORTS_MISSING_FIELD_INITIALIZERS_FLAG) - if (CMAKE_COMPILER_IS_GNUCXX) - append("-Wno-missing-field-initializers" CMAKE_C_FLAGS CMAKE_CXX_FLAGS) - else() - append("-Wmissing-field-initializers" CMAKE_C_FLAGS CMAKE_CXX_FLAGS) - endif() - endif() - - append_if(LLVM_ENABLE_PEDANTIC "-pedantic" CMAKE_C_FLAGS CMAKE_CXX_FLAGS) - append_if(LLVM_ENABLE_PEDANTIC "-Wno-long-long" CMAKE_C_FLAGS CMAKE_CXX_FLAGS) - add_flag_if_supported("-Wcovered-switch-default" COVERED_SWITCH_DEFAULT_FLAG) - append_if(USE_NO_UNINITIALIZED "-Wno-uninitialized" CMAKE_CXX_FLAGS) - append_if(USE_NO_MAYBE_UNINITIALIZED "-Wno-maybe-uninitialized" CMAKE_CXX_FLAGS) - - # Check if -Wnon-virtual-dtor warns even though the class is marked final. - # If it does, don't add it. So it won't be added on clang 3.4 and older. - # This also catches cases when -Wnon-virtual-dtor isn't supported by - # the compiler at all. This flag is not activated for gcc since it will - # incorrectly identify a protected non-virtual base when there is a friend - # declaration. - if (NOT CMAKE_COMPILER_IS_GNUCXX) - set(OLD_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS}) - set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -std=c++11 -Werror=non-virtual-dtor") - CHECK_CXX_SOURCE_COMPILES("class base {public: virtual void anchor();protected: ~base();}; - class derived final : public base { public: ~derived();}; - int main() { return 0; }" - CXX_WONT_WARN_ON_FINAL_NONVIRTUALDTOR) - set(CMAKE_REQUIRED_FLAGS ${OLD_CMAKE_REQUIRED_FLAGS}) - append_if(CXX_WONT_WARN_ON_FINAL_NONVIRTUALDTOR - "-Wnon-virtual-dtor" CMAKE_CXX_FLAGS) - endif() - - # Enable -Wdelete-non-virtual-dtor if available. - add_flag_if_supported("-Wdelete-non-virtual-dtor" DELETE_NON_VIRTUAL_DTOR_FLAG) - - # Check if -Wcomment is OK with an // comment ending with '\' if the next - # line is also a // comment. - set(OLD_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS}) - set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -Werror -Wcomment") - CHECK_C_SOURCE_COMPILES("// \\\\\\n//\\nint main() {return 0;}" - C_WCOMMENT_ALLOWS_LINE_WRAP) - set(CMAKE_REQUIRED_FLAGS ${OLD_CMAKE_REQUIRED_FLAGS}) - if (NOT C_WCOMMENT_ALLOWS_LINE_WRAP) - append("-Wno-comment" CMAKE_C_FLAGS CMAKE_CXX_FLAGS) - endif() - - # Enable -Wstring-conversion to catch misuse of string literals. - add_flag_if_supported("-Wstring-conversion" STRING_CONVERSION_FLAG) - endif (LLVM_ENABLE_WARNINGS) append_if(LLVM_ENABLE_WERROR "-Werror" CMAKE_C_FLAGS CMAKE_CXX_FLAGS) add_flag_if_supported("-Werror=date-time" WERROR_DATE_TIME) if (LLVM_ENABLE_CXX1Y) @@ -521,6 +416,151 @@ elseif( LLVM_COMPILER_IS_GCC_COMPATIBLE ) endif(LLVM_ENABLE_MODULES) endif( MSVC ) +if (MSVC AND NOT CLANG_CL) + set(msvc_warning_flags + # Disabled warnings. + -wd4141 # Suppress ''modifier' : used more than once' (because of __forceinline combined with inline) + -wd4146 # Suppress 'unary minus operator applied to unsigned type, result still unsigned' + -wd4180 # Suppress 'qualifier applied to function type has no meaning; ignored' + -wd4244 # Suppress ''argument' : conversion from 'type1' to 'type2', possible loss of data' + -wd4258 # Suppress ''var' : definition from the for loop is ignored; the definition from the enclosing scope is used' + -wd4267 # Suppress ''var' : conversion from 'size_t' to 'type', possible loss of data' + -wd4291 # Suppress ''declaration' : no matching operator delete found; memory will not be freed if initialization throws an exception' + -wd4345 # Suppress 'behavior change: an object of POD type constructed with an initializer of the form () will be default-initialized' + -wd4351 # Suppress 'new behavior: elements of array 'array' will be default initialized' + -wd4355 # Suppress ''this' : used in base member initializer list' + -wd4456 # Suppress 'declaration of 'var' hides local variable' + -wd4457 # Suppress 'declaration of 'var' hides function parameter' + -wd4458 # Suppress 'declaration of 'var' hides class member' + -wd4459 # Suppress 'declaration of 'var' hides global declaration' + -wd4503 # Suppress ''identifier' : decorated name length exceeded, name was truncated' + -wd4624 # Suppress ''derived class' : destructor could not be generated because a base class destructor is inaccessible' + -wd4722 # Suppress 'function' : destructor never returns, potential memory leak + -wd4800 # Suppress ''type' : forcing value to bool 'true' or 'false' (performance warning)' + -wd4100 # Suppress 'unreferenced formal parameter' + -wd4127 # Suppress 'conditional expression is constant' + -wd4512 # Suppress 'assignment operator could not be generated' + -wd4505 # Suppress 'unreferenced local function has been removed' + -wd4610 # Suppress ' can never be instantiated' + -wd4510 # Suppress 'default constructor could not be generated' + -wd4702 # Suppress 'unreachable code' + -wd4245 # Suppress 'signed/unsigned mismatch' + -wd4706 # Suppress 'assignment within conditional expression' + -wd4310 # Suppress 'cast truncates constant value' + -wd4701 # Suppress 'potentially uninitialized local variable' + -wd4703 # Suppress 'potentially uninitialized local pointer variable' + -wd4389 # Suppress 'signed/unsigned mismatch' + -wd4611 # Suppress 'interaction between '_setjmp' and C++ object destruction is non-portable' + -wd4805 # Suppress 'unsafe mix of type and type in operation' + -wd4204 # Suppress 'nonstandard extension used : non-constant aggregate initializer' + -wd4577 # Suppress 'noexcept used with no exception handling mode specified; termination on exception is not guaranteed' + -wd4091 # Suppress 'typedef: ignored on left of '' when no variable is declared' + # C4592 is disabled because of false positives in Visual Studio 2015 + # Update 1. Re-evaluate the usefulness of this diagnostic with Update 2. + -wd4592 # Suppress ''var': symbol will be dynamically initialized (implementation limitation) + -wd4319 # Suppress ''operator' : zero extending 'type' to 'type' of greater size' + + # Ideally, we'd like this warning to be enabled, but MSVC 2013 doesn't + # support the 'aligned' attribute in the way that clang sources requires (for + # any code that uses the LLVM_ALIGNAS macro), so this is must be disabled to + # avoid unwanted alignment warnings. + # When we switch to requiring a version of MSVC that supports the 'alignas' + # specifier (MSVC 2015?) this warning can be re-enabled. + -wd4324 # Suppress 'structure was padded due to __declspec(align())' + + # Promoted warnings. + -w14062 # Promote 'enumerator in switch of enum is not handled' to level 1 warning. + + # Promoted warnings to errors. + -we4238 # Promote 'nonstandard extension used : class rvalue used as lvalue' to error. + ) + + # Enable warnings + if (LLVM_ENABLE_WARNINGS) + # Put /W4 in front of all the -we flags. cl.exe doesn't care, but for + # clang-cl having /W4 after the -we flags will re-enable the warnings + # disabled by -we. + set(msvc_warning_flags "/W4 ${msvc_warning_flags}") + # CMake appends /W3 by default, and having /W3 followed by /W4 will result in + # cl : Command line warning D9025 : overriding '/W3' with '/W4'. Since this is + # a command line warning and not a compiler warning, it cannot be suppressed except + # by fixing the command line. + string(REGEX REPLACE " /W[0-4]" "" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}") + string(REGEX REPLACE " /W[0-4]" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") + + if (LLVM_ENABLE_PEDANTIC) + # No MSVC equivalent available + endif (LLVM_ENABLE_PEDANTIC) + endif (LLVM_ENABLE_WARNINGS) + + foreach(flag ${msvc_warning_flags}) + append("${flag}" CMAKE_C_FLAGS CMAKE_CXX_FLAGS) + endforeach(flag) +endif (MSVC AND NOT CLANG_CL) + +if (LLVM_ENABLE_WARNINGS AND (LLVM_COMPILER_IS_GCC_COMPATIBLE OR CLANG_CL)) + append("-Wall -W -Wno-unused-parameter -Wwrite-strings" CMAKE_C_FLAGS CMAKE_CXX_FLAGS) + append("-Wcast-qual" CMAKE_CXX_FLAGS) + + # Turn off missing field initializer warnings for gcc to avoid noise from + # false positives with empty {}. Turn them on otherwise (they're off by + # default for clang). + check_cxx_compiler_flag("-Wmissing-field-initializers" CXX_SUPPORTS_MISSING_FIELD_INITIALIZERS_FLAG) + if (CXX_SUPPORTS_MISSING_FIELD_INITIALIZERS_FLAG) + if (CMAKE_COMPILER_IS_GNUCXX) + append("-Wno-missing-field-initializers" CMAKE_C_FLAGS CMAKE_CXX_FLAGS) + else() + append("-Wmissing-field-initializers" CMAKE_C_FLAGS CMAKE_CXX_FLAGS) + endif() + endif() + + if (LLVM_ENABLE_PEDANTIC AND LLVM_COMPILER_IS_GCC_COMPATIBLE) + append("-pedantic" CMAKE_C_FLAGS CMAKE_CXX_FLAGS) + append("-Wno-long-long" CMAKE_C_FLAGS CMAKE_CXX_FLAGS) + endif() + + add_flag_if_supported("-Wcovered-switch-default" COVERED_SWITCH_DEFAULT_FLAG) + append_if(USE_NO_UNINITIALIZED "-Wno-uninitialized" CMAKE_CXX_FLAGS) + append_if(USE_NO_MAYBE_UNINITIALIZED "-Wno-maybe-uninitialized" CMAKE_CXX_FLAGS) + + # Check if -Wnon-virtual-dtor warns even though the class is marked final. + # If it does, don't add it. So it won't be added on clang 3.4 and older. + # This also catches cases when -Wnon-virtual-dtor isn't supported by + # the compiler at all. This flag is not activated for gcc since it will + # incorrectly identify a protected non-virtual base when there is a friend + # declaration. Don't activate this in general on Windows as this warning has + # too many false positives on COM-style classes, which are destroyed with + # Release() (PR32286). + if (NOT CMAKE_COMPILER_IS_GNUCXX AND NOT WIN32) + set(OLD_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS}) + set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -std=c++11 -Werror=non-virtual-dtor") + CHECK_CXX_SOURCE_COMPILES("class base {public: virtual void anchor();protected: ~base();}; + class derived final : public base { public: ~derived();}; + int main() { return 0; }" + CXX_WONT_WARN_ON_FINAL_NONVIRTUALDTOR) + set(CMAKE_REQUIRED_FLAGS ${OLD_CMAKE_REQUIRED_FLAGS}) + append_if(CXX_WONT_WARN_ON_FINAL_NONVIRTUALDTOR + "-Wnon-virtual-dtor" CMAKE_CXX_FLAGS) + endif() + + # Enable -Wdelete-non-virtual-dtor if available. + add_flag_if_supported("-Wdelete-non-virtual-dtor" DELETE_NON_VIRTUAL_DTOR_FLAG) + + # Check if -Wcomment is OK with an // comment ending with '\' if the next + # line is also a // comment. + set(OLD_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS}) + set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -Werror -Wcomment") + CHECK_C_SOURCE_COMPILES("// \\\\\\n//\\nint main() {return 0;}" + C_WCOMMENT_ALLOWS_LINE_WRAP) + set(CMAKE_REQUIRED_FLAGS ${OLD_CMAKE_REQUIRED_FLAGS}) + if (NOT C_WCOMMENT_ALLOWS_LINE_WRAP) + append("-Wno-comment" CMAKE_C_FLAGS CMAKE_CXX_FLAGS) + endif() + + # Enable -Wstring-conversion to catch misuse of string literals. + add_flag_if_supported("-Wstring-conversion" STRING_CONVERSION_FLAG) +endif (LLVM_ENABLE_WARNINGS AND (LLVM_COMPILER_IS_GCC_COMPATIBLE OR CLANG_CL)) + macro(append_common_sanitizer_flags) if (NOT MSVC) # Append -fno-omit-frame-pointer and turn on debug info to get better @@ -537,7 +577,7 @@ macro(append_common_sanitizer_flags) elseif (CLANG_CL) # Keep frame pointers around. append("/Oy-" CMAKE_C_FLAGS CMAKE_CXX_FLAGS) - if (CMAKE_LINKER MATCHES "lld-link.exe") + if (LINKER_IS_LLD_LINK) # Use DWARF debug info with LLD. append("-gdwarf" CMAKE_C_FLAGS CMAKE_CXX_FLAGS) else() @@ -565,8 +605,11 @@ if(LLVM_USE_SANITIZER) append_common_sanitizer_flags() append("-fsanitize=undefined -fno-sanitize=vptr,function -fno-sanitize-recover=all" CMAKE_C_FLAGS CMAKE_CXX_FLAGS) - append("-fsanitize-blacklist=${CMAKE_SOURCE_DIR}/utils/sanitizers/ubsan_blacklist.txt" - CMAKE_C_FLAGS CMAKE_CXX_FLAGS) + set(BLACKLIST_FILE "${CMAKE_SOURCE_DIR}/utils/sanitizers/ubsan_blacklist.txt") + if (EXISTS "${BLACKLIST_FILE}") + append("-fsanitize-blacklist=${BLACKLIST_FILE}" + CMAKE_C_FLAGS CMAKE_CXX_FLAGS) + endif() elseif (LLVM_USE_SANITIZER STREQUAL "Thread") append_common_sanitizer_flags() append("-fsanitize=thread" CMAKE_C_FLAGS CMAKE_CXX_FLAGS) @@ -602,9 +645,9 @@ if(LLVM_USE_SPLIT_DWARF) add_definitions("-gsplit-dwarf") endif() -add_llvm_definitions( -D__STDC_CONSTANT_MACROS ) -add_llvm_definitions( -D__STDC_FORMAT_MACROS ) -add_llvm_definitions( -D__STDC_LIMIT_MACROS ) +add_definitions( -D__STDC_CONSTANT_MACROS ) +add_definitions( -D__STDC_FORMAT_MACROS ) +add_definitions( -D__STDC_LIMIT_MACROS ) # clang doesn't print colored diagnostics when invoked from Ninja if (UNIX AND @@ -672,20 +715,38 @@ append_if(LLVM_BUILD_INSTRUMENTED_COVERAGE "-fprofile-instr-generate='${LLVM_PRO set(LLVM_ENABLE_LTO OFF CACHE STRING "Build LLVM with LTO. May be specified as Thin or Full to use a particular kind of LTO") string(TOUPPER "${LLVM_ENABLE_LTO}" uppercase_LLVM_ENABLE_LTO) +if(LLVM_ENABLE_LTO AND LLVM_ON_WIN32 AND NOT LINKER_IS_LLD_LINK) + message(FATAL_ERROR "When compiling for Windows, LLVM_ENABLE_LTO requires using lld as the linker (point CMAKE_LINKER at lld-link.exe)") +endif() if(uppercase_LLVM_ENABLE_LTO STREQUAL "THIN") - append("-flto=thin" CMAKE_CXX_FLAGS CMAKE_C_FLAGS - CMAKE_EXE_LINKER_FLAGS CMAKE_SHARED_LINKER_FLAGS) - # On darwin, enable the lto cache. This improves initial build time a little - # since we re-link a lot of the same objects, and significantly improves - # incremental build time. - append_if(APPLE "-Wl,-cache_path_lto,${PROJECT_BINARY_DIR}/lto.cache" - CMAKE_EXE_LINKER_FLAGS CMAKE_SHARED_LINKER_FLAGS) + append("-flto=thin" CMAKE_CXX_FLAGS CMAKE_C_FLAGS) + if(NOT LINKER_IS_LLD_LINK) + append("-flto=thin" CMAKE_EXE_LINKER_FLAGS CMAKE_SHARED_LINKER_FLAGS) + endif() + # If the linker supports it, enable the lto cache. This improves initial build + # time a little since we re-link a lot of the same objects, and significantly + # improves incremental build time. + # FIXME: We should move all this logic into the clang driver. + if(APPLE) + append("-Wl,-cache_path_lto,${PROJECT_BINARY_DIR}/lto.cache" + CMAKE_EXE_LINKER_FLAGS CMAKE_SHARED_LINKER_FLAGS) + elseif(UNIX AND LLVM_USE_LINKER STREQUAL "lld") + append("-Wl,--thinlto-cache-dir=${PROJECT_BINARY_DIR}/lto.cache" + CMAKE_EXE_LINKER_FLAGS CMAKE_SHARED_LINKER_FLAGS) + elseif(LLVM_USE_LINKER STREQUAL "gold") + append("-Wl,--plugin-opt,cache-dir=${PROJECT_BINARY_DIR}/lto.cache" + CMAKE_EXE_LINKER_FLAGS CMAKE_SHARED_LINKER_FLAGS) + endif() elseif(uppercase_LLVM_ENABLE_LTO STREQUAL "FULL") - append("-flto=full" CMAKE_CXX_FLAGS CMAKE_C_FLAGS - CMAKE_EXE_LINKER_FLAGS CMAKE_SHARED_LINKER_FLAGS) + append("-flto=full" CMAKE_CXX_FLAGS CMAKE_C_FLAGS) + if(NOT LINKER_IS_LLD_LINK) + append("-flto=full" CMAKE_EXE_LINKER_FLAGS CMAKE_SHARED_LINKER_FLAGS) + endif() elseif(LLVM_ENABLE_LTO) - append("-flto" CMAKE_CXX_FLAGS CMAKE_C_FLAGS - CMAKE_EXE_LINKER_FLAGS CMAKE_SHARED_LINKER_FLAGS) + append("-flto" CMAKE_CXX_FLAGS CMAKE_C_FLAGS) + if(NOT LINKER_IS_LLD_LINK) + append("-flto" CMAKE_EXE_LINKER_FLAGS CMAKE_SHARED_LINKER_FLAGS) + endif() endif() # This option makes utils/extract_symbols.py be used to determine the list of @@ -712,3 +773,16 @@ if(WIN32 OR CYGWIN) else() set(LLVM_ENABLE_PLUGINS ON) endif() + +function(get_compile_definitions) + get_directory_property(top_dir_definitions DIRECTORY ${CMAKE_SOURCE_DIR} COMPILE_DEFINITIONS) + foreach(definition ${top_dir_definitions}) + if(DEFINED result) + string(APPEND result " -D${definition}") + else() + set(result "-D${definition}") + endif() + endforeach() + set(LLVM_DEFINITIONS "${result}" PARENT_SCOPE) +endfunction() +get_compile_definitions() diff --git a/cmake/modules/LLVM-Config.cmake b/cmake/modules/LLVM-Config.cmake index 725178ab57b171ed8f3d737f21d2418251335aca..52330151065b69c0d7e7c1b51f0874061ae029c7 100644 --- a/cmake/modules/LLVM-Config.cmake +++ b/cmake/modules/LLVM-Config.cmake @@ -8,27 +8,61 @@ function(link_system_libs target) message(AUTHOR_WARNING "link_system_libs no longer needed") endfunction() - +# is_llvm_target_library( +# library +# Name of the LLVM library to check +# return_var +# Output variable name +# ALL_TARGETS;INCLUDED_TARGETS;OMITTED_TARGETS +# ALL_TARGETS - default looks at the full list of known targets +# INCLUDED_TARGETS - looks only at targets being configured +# OMITTED_TARGETS - looks only at targets that are not being configured +# ) function(is_llvm_target_library library return_var) + cmake_parse_arguments(ARG "ALL_TARGETS;INCLUDED_TARGETS;OMITTED_TARGETS" "" "" ${ARGN}) # Sets variable `return_var' to ON if `library' corresponds to a # LLVM supported target. To OFF if it doesn't. set(${return_var} OFF PARENT_SCOPE) string(TOUPPER "${library}" capitalized_lib) - string(TOUPPER "${LLVM_ALL_TARGETS}" targets) + if(ARG_INCLUDED_TARGETS) + string(TOUPPER "${LLVM_TARGETS_TO_BUILD}" targets) + elseif(ARG_OMITTED_TARGETS) + set(omitted_targets ${LLVM_ALL_TARGETS}) + list(REMOVE_ITEM omitted_targets ${LLVM_TARGETS_TO_BUILD}) + string(TOUPPER "${omitted_targets}" targets) + else() + string(TOUPPER "${LLVM_ALL_TARGETS}" targets) + endif() foreach(t ${targets}) if( capitalized_lib STREQUAL t OR - capitalized_lib STREQUAL "LLVM${t}" OR - capitalized_lib STREQUAL "LLVM${t}CODEGEN" OR - capitalized_lib STREQUAL "LLVM${t}ASMPARSER" OR - capitalized_lib STREQUAL "LLVM${t}ASMPRINTER" OR - capitalized_lib STREQUAL "LLVM${t}DISASSEMBLER" OR - capitalized_lib STREQUAL "LLVM${t}INFO" ) + capitalized_lib STREQUAL "${t}" OR + capitalized_lib STREQUAL "${t}DESC" OR + capitalized_lib STREQUAL "${t}CODEGEN" OR + capitalized_lib STREQUAL "${t}ASMPARSER" OR + capitalized_lib STREQUAL "${t}ASMPRINTER" OR + capitalized_lib STREQUAL "${t}DISASSEMBLER" OR + capitalized_lib STREQUAL "${t}INFO" OR + capitalized_lib STREQUAL "${t}UTILS" ) set(${return_var} ON PARENT_SCOPE) break() endif() endforeach() endfunction(is_llvm_target_library) +function(is_llvm_target_specifier library return_var) + is_llvm_target_library(${library} ${return_var} ${ARGN}) + string(TOUPPER "${library}" capitalized_lib) + if(NOT ${return_var}) + if( capitalized_lib STREQUAL "ALLTARGETSASMPARSERS" OR + capitalized_lib STREQUAL "ALLTARGETSDESCS" OR + capitalized_lib STREQUAL "ALLTARGETSDISASSEMBLERS" OR + capitalized_lib STREQUAL "ALLTARGETSINFOS" OR + capitalized_lib STREQUAL "NATIVE" OR + capitalized_lib STREQUAL "NATIVECODEGEN" ) + set(${return_var} ON PARENT_SCOPE) + endif() + endif() +endfunction() macro(llvm_config executable) cmake_parse_arguments(ARG "USE_SHARED" "" "" ${ARGN}) @@ -93,6 +127,21 @@ function(llvm_map_components_to_libnames out_libs) endif() string(TOUPPER "${LLVM_AVAILABLE_LIBS}" capitalized_libs) + get_property(LLVM_TARGETS_CONFIGURED GLOBAL PROPERTY LLVM_TARGETS_CONFIGURED) + + # Generally in our build system we avoid order-dependence. Unfortunately since + # not all targets create the same set of libraries we actually need to ensure + # that all build targets associated with a target are added before we can + # process target dependencies. + if(NOT LLVM_TARGETS_CONFIGURED) + foreach(c ${link_components}) + is_llvm_target_specifier(${c} iltl_result ALL_TARGETS) + if(iltl_result) + message(FATAL_ERROR "Specified target library before target registration is complete.") + endif() + endforeach() + endif() + # Expand some keywords: list(FIND LLVM_TARGETS_TO_BUILD "${LLVM_NATIVE_ARCH}" have_native_backend) list(FIND link_components "engine" engine_required) @@ -141,6 +190,12 @@ function(llvm_map_components_to_libnames out_libs) if( TARGET LLVM${c}Disassembler ) list(APPEND expanded_components "LLVM${c}Disassembler") endif() + if( TARGET LLVM${c}Info ) + list(APPEND expanded_components "LLVM${c}Info") + endif() + if( TARGET LLVM${c}Utils ) + list(APPEND expanded_components "LLVM${c}Utils") + endif() elseif( c STREQUAL "native" ) # already processed elseif( c STREQUAL "nativecodegen" ) @@ -198,9 +253,16 @@ function(llvm_map_components_to_libnames out_libs) list(FIND capitalized_libs LLVM${capitalized} lib_idx) if( lib_idx LESS 0 ) # The component is unknown. Maybe is an omitted target? - is_llvm_target_library(${c} iltl_result) - if( NOT iltl_result ) - message(FATAL_ERROR "Library `${c}' not found in list of llvm libraries.") + is_llvm_target_library(${c} iltl_result OMITTED_TARGETS) + if(iltl_result) + # A missing library to a directly referenced omitted target would be bad. + message(FATAL_ERROR "Library '${c}' is a direct reference to a target library for an omitted target.") + else() + # If it is not an omitted target we should assume it is a component + # that hasn't yet been processed by CMake. Missing components will + # cause errors later in the configuration, so we can safely assume + # that this is valid here. + list(APPEND expanded_components LLVM${c}) endif() else( lib_idx LESS 0 ) list(GET LLVM_AVAILABLE_LIBS ${lib_idx} canonical_lib) diff --git a/cmake/modules/LLVMConfig.cmake.in b/cmake/modules/LLVMConfig.cmake.in index c30c92b66d8af9bf535c6651ff2ebe8be4b7fdff..7a8eb3674720adf05dabca3ba05774d6c05f9b18 100644 --- a/cmake/modules/LLVMConfig.cmake.in +++ b/cmake/modules/LLVMConfig.cmake.in @@ -45,6 +45,10 @@ set(LLVM_ENABLE_PIC @LLVM_ENABLE_PIC@) set(LLVM_BUILD_32_BITS @LLVM_BUILD_32_BITS@) +if (NOT "@LLVM_PTHREAD_LIB@" STREQUAL "") + set(LLVM_PTHREAD_LIB "@LLVM_PTHREAD_LIB@") +endif() + set(LLVM_ENABLE_PLUGINS @LLVM_ENABLE_PLUGINS@) set(LLVM_EXPORT_SYMBOLS_FOR_PLUGINS @LLVM_EXPORT_SYMBOLS_FOR_PLUGINS@) set(LLVM_PLUGIN_EXT @LLVM_PLUGIN_EXT@) @@ -75,4 +79,5 @@ if(NOT TARGET LLVMSupport) @llvm_config_include_buildtree_only_exports@ endif() +set_property(GLOBAL PROPERTY LLVM_TARGETS_CONFIGURED On) include(${LLVM_CMAKE_DIR}/LLVM-Config.cmake) diff --git a/cmake/modules/TableGen.cmake b/cmake/modules/TableGen.cmake index 9682002c2abd93e76e968163be0f9ee84e412698..da0858e54d441d6ca892dd91d1708486c21a14db 100644 --- a/cmake/modules/TableGen.cmake +++ b/cmake/modules/TableGen.cmake @@ -23,6 +23,13 @@ function(tablegen project ofn) set(LLVM_TARGET_DEFINITIONS_ABSOLUTE ${CMAKE_CURRENT_SOURCE_DIR}/${LLVM_TARGET_DEFINITIONS}) endif() + if (LLVM_ENABLE_DAGISEL_COV) + list(FIND ARGN "-gen-dag-isel" idx) + if( NOT idx EQUAL -1 ) + list(APPEND LLVM_TABLEGEN_FLAGS "-instrument-coverage") + endif() + endif() + add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${ofn}.tmp # Generate tablegen output in a temporary file. COMMAND ${${project}_TABLEGEN_EXE} ${ARGN} -I ${CMAKE_CURRENT_SOURCE_DIR} @@ -92,7 +99,7 @@ macro(add_tablegen target project) set(LLVM_ENABLE_OBJLIB ON) endif() - add_llvm_utility(${target} ${ARGN}) + add_llvm_executable(${target} DISABLE_LLVM_LINK_LLVM_DYLIB ${ARGN}) set(LLVM_LINK_COMPONENTS ${${target}_OLD_LLVM_LINK_COMPONENTS}) set(${project}_TABLEGEN "${target}" CACHE diff --git a/cmake/modules/VersionFromVCS.cmake b/cmake/modules/VersionFromVCS.cmake index 8d56b66fa4781afd0dfbbdfb1bfdbf5688983fc1..e92540991a1092d7d15daa496456753857680156 100644 --- a/cmake/modules/VersionFromVCS.cmake +++ b/cmake/modules/VersionFromVCS.cmake @@ -28,10 +28,11 @@ function(add_version_info_from_vcs VERS) elseif( EXISTS ${SOURCE_DIR}/.git ) set(result "${result}git") # Try to get a ref-id - if( EXISTS ${SOURCE_DIR}/.git/svn ) - find_program(git_executable NAMES git git.exe git.cmd) - if( git_executable ) - set(is_git_svn_rev_exact false) + find_program(git_executable NAMES git git.exe git.cmd) + + if( git_executable ) + if( EXISTS ${SOURCE_DIR}/.git/svn ) + # Get the repository URL execute_process(COMMAND ${git_executable} svn info WORKING_DIRECTORY ${SOURCE_DIR} @@ -43,42 +44,37 @@ function(add_version_info_from_vcs VERS) if(svn_url) set(LLVM_REPOSITORY ${CMAKE_MATCH_1} PARENT_SCOPE) endif() - - string(REGEX REPLACE "^(.*\n)?Revision: ([^\n]+).*" - "\\2" git_svn_rev_number "${git_output}") - set(SVN_REVISION ${git_svn_rev_number} PARENT_SCOPE) - set(git_svn_rev "-svn-${git_svn_rev}") - - # Determine if the HEAD points directly at a subversion revision. - execute_process(COMMAND ${git_executable} svn find-rev HEAD - WORKING_DIRECTORY ${SOURCE_DIR} - TIMEOUT 5 - RESULT_VARIABLE git_result - OUTPUT_VARIABLE git_output) - if( git_result EQUAL 0 ) - string(STRIP "${git_output}" git_head_svn_rev_number) - if( git_head_svn_rev_number EQUAL git_svn_rev_number ) - set(is_git_svn_rev_exact true) - endif() - endif() - else() - set(git_svn_rev "") endif() - execute_process(COMMAND - ${git_executable} rev-parse --short HEAD + + # Get the svn revision number for this git commit if one exists. + execute_process(COMMAND ${git_executable} svn find-rev HEAD WORKING_DIRECTORY ${SOURCE_DIR} TIMEOUT 5 RESULT_VARIABLE git_result - OUTPUT_VARIABLE git_output) - - if( git_result EQUAL 0 AND NOT is_git_svn_rev_exact ) - string(STRIP "${git_output}" git_ref_id) - set(GIT_COMMIT ${git_ref_id} PARENT_SCOPE) - set(result "${result}${git_svn_rev}-${git_ref_id}") + OUTPUT_VARIABLE git_head_svn_rev_number + OUTPUT_STRIP_TRAILING_WHITESPACE) + if( git_result EQUAL 0 AND git_output) + set(SVN_REVISION ${git_head_svn_rev_number} PARENT_SCOPE) + set(git_svn_rev "-svn-${git_head_svn_rev_number}") else() - set(result "${result}${git_svn_rev}") + set(git_svn_rev "") endif() + endif() + + # Get the git ref id + execute_process(COMMAND + ${git_executable} rev-parse --short HEAD + WORKING_DIRECTORY ${SOURCE_DIR} + TIMEOUT 5 + RESULT_VARIABLE git_result + OUTPUT_VARIABLE git_ref_id + OUTPUT_STRIP_TRAILING_WHITESPACE) + if( git_result EQUAL 0 ) + set(GIT_COMMIT ${git_ref_id} PARENT_SCOPE) + set(result "${result}${git_svn_rev}-${git_ref_id}") + else() + set(result "${result}${git_svn_rev}") endif() endif() endif() diff --git a/cmake/platforms/iOS.cmake b/cmake/platforms/iOS.cmake index 99692fd6d2aaedd406db763aa7724ee475d36020..15c7aae12c702bfdbcdce50460bf8df3f3273366 100644 --- a/cmake/platforms/iOS.cmake +++ b/cmake/platforms/iOS.cmake @@ -4,6 +4,7 @@ SET(CMAKE_SYSTEM_NAME Darwin) SET(CMAKE_SYSTEM_VERSION 13) SET(CMAKE_CXX_COMPILER_WORKS True) SET(CMAKE_C_COMPILER_WORKS True) +SET(IOS True) if(NOT CMAKE_OSX_SYSROOT) execute_process(COMMAND xcodebuild -version -sdk iphoneos Path diff --git a/docs/AMDGPUUsage.rst b/docs/AMDGPUUsage.rst index 2c1a22762bce0556ceb051a400b85da50b3ec77c..5ff0f207f227b8f42c4a971e519f37e42d4b52a1 100644 --- a/docs/AMDGPUUsage.rst +++ b/docs/AMDGPUUsage.rst @@ -19,20 +19,73 @@ Address Spaces The AMDGPU back-end uses the following address space mapping: - ============= ============================================ - Address Space Memory Space - ============= ============================================ - 0 Private - 1 Global - 2 Constant - 3 Local - 4 Generic (Flat) - 5 Region - ============= ============================================ + ================== =================== ============== + LLVM Address Space DWARF Address Space Memory Space + ================== =================== ============== + 0 1 Private + 1 N/A Global + 2 N/A Constant + 3 2 Local + 4 N/A Generic (Flat) + 5 N/A Region + ================== =================== ============== The terminology in the table, aside from the region memory space, is from the OpenCL standard. +LLVM Address Space is used throughout LLVM (for example, in LLVM IR). DWARF +Address Space is emitted in DWARF, and is used by tools, such as debugger, +profiler and others. + +Trap Handler ABI +---------------- +The OS element of the target triple controls the trap handler behavior. + +HSA OS +^^^^^^ +For code objects generated by AMDGPU back-end for the HSA OS, the runtime +installs a trap handler that supports the s_trap instruction with the following +usage: + + +--------------+-------------+-------------------+----------------------------+ + |Usage |Code Sequence|Trap Handler Inputs|Description | + +==============+=============+===================+============================+ + |reserved |s_trap 0x00 | |Reserved by hardware. | + +--------------+-------------+-------------------+----------------------------+ + |HSA debugtrap |s_trap 0x01 |SGPR0-1: queue_ptr |Reserved for HSA debugtrap | + |(arg) | |VGPR0: arg |intrinsic (not implemented).| + +--------------+-------------+-------------------+----------------------------+ + |llvm.trap |s_trap 0x02 |SGPR0-1: queue_ptr |Causes dispatch to be | + | | | |terminated and its | + | | | |associated queue put into | + | | | |the error state. | + +--------------+-------------+-------------------+----------------------------+ + |llvm.debugtrap| s_trap 0x03 |SGPR0-1: queue_ptr |If debugger not installed | + | | | |handled same as llvm.trap. | + +--------------+-------------+-------------------+----------------------------+ + |debugger |s_trap 0x07 | |Reserved for debugger | + |breakpoint | | |breakpoints. | + +--------------+-------------+-------------------+----------------------------+ + |debugger |s_trap 0x08 | |Reserved for debugger. | + +--------------+-------------+-------------------+----------------------------+ + |debugger |s_trap 0xfe | |Reserved for debugger. | + +--------------+-------------+-------------------+----------------------------+ + |debugger |s_trap 0xff | |Reserved for debugger. | + +--------------+-------------+-------------------+----------------------------+ + +Non-HSA OS +^^^^^^^^^^ +For code objects generated by AMDGPU back-end for non-HSA OS, the runtime does +not install a trap handler. The llvm.trap and llvm.debugtrap instructions are +handler as follows: + + =============== ============= =============================================== + Usage Code Sequence Description + =============== ============= =============================================== + llvm.trap s_endpgm Causes wavefront to be terminated. + llvm.debugtrap s_nop No operation. Compiler warning generated that + there is no trap handler installed. + =============== ============= =============================================== Assembler ========= diff --git a/docs/AliasAnalysis.rst b/docs/AliasAnalysis.rst index 02b749ffb9181e106c560a62f4f21921836ae5e0..e201333f30070fc821ef34132e1d70cfbca98deb 100644 --- a/docs/AliasAnalysis.rst +++ b/docs/AliasAnalysis.rst @@ -136,7 +136,7 @@ be overlapping in some way, but do not start at the same address. The ``MustAlias`` response may only be returned if the two memory objects are guaranteed to always start at exactly the same location. A ``MustAlias`` -response implies that the pointers compare equal. +response does not imply that the pointers compare equal. The ``getModRefInfo`` methods ----------------------------- diff --git a/docs/BitCodeFormat.rst b/docs/BitCodeFormat.rst index 3c9aa1010704ce5b5b26254f81eb1067a3703ef9..a9a123595f7f5aa983201c33b1d5c1cb2525f95b 100644 --- a/docs/BitCodeFormat.rst +++ b/docs/BitCodeFormat.rst @@ -839,16 +839,6 @@ fields are * *unnamed_addr*: If present, an encoding of the :ref:`unnamed_addr` attribute of this alias -MODULE_CODE_PURGEVALS Record -^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -``[PURGEVALS, numvals]`` - -The ``PURGEVALS`` record (code 10) resets the module-level value list to the -size given by the single operand value. Module-level value list items are added -by ``GLOBALVAR``, ``FUNCTION``, and ``ALIAS`` records. After a ``PURGEVALS`` -record is seen, new value indices will start from the given *numvals* value. - .. _MODULE_CODE_GCNAME: MODULE_CODE_GCNAME Record diff --git a/docs/BranchWeightMetadata.rst b/docs/BranchWeightMetadata.rst index 9e61d232d74b57a3367b1d93a1fdb5951331b96a..b941d0d1505064139d9875bc0fcc263e60c7cf22 100644 --- a/docs/BranchWeightMetadata.rst +++ b/docs/BranchWeightMetadata.rst @@ -123,11 +123,11 @@ To allow comparing different functions during inter-procedural analysis and optimization, ``MD_prof`` nodes can also be assigned to a function definition. The first operand is a string indicating the name of the associated counter. -Currently, one counter is supported: "function_entry_count". This is a 64-bit -counter that indicates the number of times that this function was invoked (in -the case of instrumentation-based profiles). In the case of sampling-based -profiles, this counter is an approximation of how many times the function was -invoked. +Currently, one counter is supported: "function_entry_count". The second operand +is a 64-bit counter that indicates the number of times that this function was +invoked (in the case of instrumentation-based profiles). In the case of +sampling-based profiles, this operand is an approximation of how many times +the function was invoked. For example, in the code below, the instrumentation for function foo() indicates that it was called 2,590 times at runtime. @@ -138,3 +138,13 @@ indicates that it was called 2,590 times at runtime. ret i32 0 } !1 = !{!"function_entry_count", i64 2590} + +If "function_entry_count" has more than 2 operands, the later operands are +the GUID of the functions that needs to be imported by ThinLTO. This is only +set by sampling based profile. It is needed because the sampling based profile +was collected on a binary that had already imported and inlined these functions, +and we need to ensure the IR matches in the ThinLTO backends for profile +annotation. The reason why we cannot annotate this on the callsite is that it +can only goes down 1 level in the call chain. For the cases where +foo_in_a_cc()->bar_in_b_cc()->baz_in_c_cc(), we will need to go down 2 levels +in the call chain to import both bar_in_b_cc and baz_in_c_cc. diff --git a/docs/CodeGenerator.rst b/docs/CodeGenerator.rst index 6e5a54a592ceef4c79ea7a4b20f1441f590718cf..106fc8456f616f05f7f51170fe3b59cc3ca0861a 100644 --- a/docs/CodeGenerator.rst +++ b/docs/CodeGenerator.rst @@ -1005,7 +1005,7 @@ The TableGen DAG instruction selector generator reads the instruction patterns in the ``.td`` file and automatically builds parts of the pattern matching code for your target. It has the following strengths: -* At compiler-compiler time, it analyzes your instruction patterns and tells you +* At compiler-compile time, it analyzes your instruction patterns and tells you if your patterns make sense or not. * It can handle arbitrary constraints on operands for the pattern match. In @@ -1026,7 +1026,7 @@ for your target. It has the following strengths: * Targets can define their own (and rely on built-in) "pattern fragments". Pattern fragments are chunks of reusable patterns that get inlined into your - patterns during compiler-compiler time. For example, the integer "``(not + patterns during compiler-compile time. For example, the integer "``(not x)``" operation is actually defined as a pattern fragment that expands as "``(xor x, -1)``", since the SelectionDAG does not have a native '``not``' operation. Targets can define their own short-hand fragments as they see fit. diff --git a/docs/CommandGuide/FileCheck.rst b/docs/CommandGuide/FileCheck.rst index 413b6f41b0cc25484f5861a636e54fb18e8b7979..8830c394b212fc343a1e6ae9cd87ccb9531dbf4a 100644 --- a/docs/CommandGuide/FileCheck.rst +++ b/docs/CommandGuide/FileCheck.rst @@ -77,6 +77,15 @@ OPTIONS -verify``. With this option FileCheck will verify that input does not contain warnings not covered by any ``CHECK:`` patterns. +.. option:: --enable-var-scope + + Enables scope for regex variables. + + Variables with names that start with ``$`` are considered global and + remain set throughout the file. + + All other variables get undefined after each encountered ``CHECK-LABEL``. + .. option:: -version Show the version number of this program. @@ -344,6 +353,9 @@ matched by the directive cannot also be matched by any other check present in other unique identifiers. Conceptually, the presence of ``CHECK-LABEL`` divides the input stream into separate blocks, each of which is processed independently, preventing a ``CHECK:`` directive in one block matching a line in another block. +If ``--enable-var-scope`` is in effect, all local variables are cleared at the +beginning of the block. + For example, .. code-block:: llvm @@ -436,6 +448,13 @@ were defined on. For example: Can be useful if you want the operands of ``op`` to be the same register, and don't care exactly which register it is. +If ``--enable-var-scope`` is in effect, variables with names that +start with ``$`` are considered to be global. All others variables are +local. All local variables get undefined at the beginning of each +CHECK-LABEL block. Global variables are not affected by CHECK-LABEL. +This makes it easier to ensure that individual tests are not affected +by variables set in preceding tests. + FileCheck Expressions ~~~~~~~~~~~~~~~~~~~~~ diff --git a/docs/CommandGuide/lit.rst b/docs/CommandGuide/lit.rst index 2e9054b1ce90836d6495e26298dd8c79e220d8dc..b8299d44d48ec552d8ff90769431baec236f8f0e 100644 --- a/docs/CommandGuide/lit.rst +++ b/docs/CommandGuide/lit.rst @@ -56,7 +56,7 @@ GENERAL OPTIONS Search for :file:`{NAME}.cfg` and :file:`{NAME}.site.cfg` when searching for test suites, instead of :file:`lit.cfg` and :file:`lit.site.cfg`. -.. option:: -D NAME, -D NAME=VALUE, --param NAME, --param NAME=VALUE +.. option:: -D NAME[=VALUE], --param NAME[=VALUE] Add a user defined parameter ``NAME`` with the given ``VALUE`` (or the empty string if not given). The meaning and use of these parameters is test suite @@ -379,7 +379,7 @@ PRE-DEFINED SUBSTITUTIONS ~~~~~~~~~~~~~~~~~~~~~~~~~~ :program:`lit` provides various patterns that can be used with the RUN command. -These are defined in TestRunner.py. +These are defined in TestRunner.py. The base set of substitutions are: ========== ============== Macro Substitution @@ -391,17 +391,13 @@ These are defined in TestRunner.py. %t temporary file name unique to the test %T temporary directory unique to the test %% % - %/s same as %s but replace all / with \\ - %/S same as %S but replace all / with \\ - %/p same as %p but replace all / with \\ - %/t same as %t but replace all / with \\ - %/T same as %T but replace all / with \\ ========== ============== -Further substitution patterns might be defined by each test module. -See the modules :ref:`local-configuration-files`. +Other substitutions are provided that are variations on this base set and +further substitution patterns can be defined by each test module. See the +modules :ref:`local-configuration-files`. -More information on the testing infrastucture can be found in the +More detailed information on substitutions can be found in the :doc:`../TestingGuide`. TEST RUN OUTPUT FORMAT diff --git a/docs/CommandGuide/llvm-cov.rst b/docs/CommandGuide/llvm-cov.rst index 4c0354c0d608fe8f1c7164efee4c52d8ddd213c0..ea2e625bc4d27e675cbb1702b1cf1ee631767e68 100644 --- a/docs/CommandGuide/llvm-cov.rst +++ b/docs/CommandGuide/llvm-cov.rst @@ -322,6 +322,10 @@ OPTIONS universal binary or to use an architecture that does not match a non-universal binary. +.. option:: -show-functions + + Show coverage summaries for each function. + .. program:: llvm-cov export .. _llvm-cov-export: diff --git a/docs/CommandGuide/llvm-profdata.rst b/docs/CommandGuide/llvm-profdata.rst index bae0ff7d4ce07bae4b18f453f1188cb584261a5b..f7aa8309485b1be2eede390db1e52899cb99be57 100644 --- a/docs/CommandGuide/llvm-profdata.rst +++ b/docs/CommandGuide/llvm-profdata.rst @@ -196,6 +196,10 @@ OPTIONS Specify that the input profile is a sample-based profile. +.. option:: -memop-sizes + + Show the profiled sizes of the memory intrinsic calls for shown functions. + EXIT STATUS ----------- diff --git a/docs/Coroutines.rst b/docs/Coroutines.rst index 0e7cde7aa38ba9457f90477128eae0d2b8fb51c4..f7a38577fe8eb603944593c6c84907267ad97730 100644 --- a/docs/Coroutines.rst +++ b/docs/Coroutines.rst @@ -89,7 +89,7 @@ and 6 after which the coroutine will be destroyed. The LLVM IR for this coroutine looks like this: -.. code-block:: none +.. code-block:: llvm define i8* @f(i32 %n) { entry: @@ -110,7 +110,7 @@ The LLVM IR for this coroutine looks like this: call void @free(i8* %mem) br label %suspend suspend: - call void @llvm.coro.end(i8* %hdl, i1 false) + %unused = call i1 @llvm.coro.end(i8* %hdl, i1 false) ret i8* %hdl } @@ -156,7 +156,7 @@ We also store addresses of the resume and destroy functions so that the when its identity cannot be determined statically at compile time. For our example, the coroutine frame will be: -.. code-block:: text +.. code-block:: llvm %f.frame = type { void (%f.frame*)*, void (%f.frame*)*, i32 } @@ -164,7 +164,7 @@ After resume and destroy parts are outlined, function `f` will contain only the code responsible for creation and initialization of the coroutine frame and execution of the coroutine until a suspend point is reached: -.. code-block:: none +.. code-block:: llvm define i8* @f(i32 %n) { entry: @@ -224,7 +224,7 @@ In the entry block, we will call `coro.alloc`_ intrinsic that will return `true` when dynamic allocation is required, and `false` if dynamic allocation is elided. -.. code-block:: none +.. code-block:: llvm entry: %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null) @@ -242,7 +242,7 @@ In the cleanup block, we will make freeing the coroutine frame conditional on `coro.free`_ intrinsic. If allocation is elided, `coro.free`_ returns `null` thus skipping the deallocation code: -.. code-block:: text +.. code-block:: llvm cleanup: %mem = call i8* @llvm.coro.free(token %id, i8* %hdl) @@ -286,7 +286,7 @@ Let's consider the coroutine that has more than one suspend point: Matching LLVM code would look like (with the rest of the code remaining the same as the code in the previous section): -.. code-block:: text +.. code-block:: llvm loop: %n.addr = phi i32 [ %n, %entry ], [ %inc, %loop.resume ] @@ -383,17 +383,17 @@ point when coroutine should be ready for resumption (namely, when a resume index should be stored in the coroutine frame, so that it can be resumed at the correct resume point): -.. code-block:: text +.. code-block:: llvm if.true: %save1 = call token @llvm.coro.save(i8* %hdl) - call void async_op1(i8* %hdl) + call void @async_op1(i8* %hdl) %suspend1 = call i1 @llvm.coro.suspend(token %save1, i1 false) switch i8 %suspend1, label %suspend [i8 0, label %resume1 i8 1, label %cleanup] if.false: %save2 = call token @llvm.coro.save(i8* %hdl) - call void async_op2(i8* %hdl) + call void @async_op2(i8* %hdl) %suspend2 = call i1 @llvm.coro.suspend(token %save2, i1 false) switch i8 %suspend1, label %suspend [i8 0, label %resume2 i8 1, label %cleanup] @@ -411,7 +411,7 @@ be used to communicate with the coroutine. This distinguished alloca is called The following coroutine designates a 32 bit integer `promise` and uses it to store the current value produced by a coroutine. -.. code-block:: text +.. code-block:: llvm define i8* @f(i32 %n) { entry: @@ -440,7 +440,7 @@ store the current value produced by a coroutine. call void @free(i8* %mem) br label %suspend suspend: - call void @llvm.coro.end(i8* %hdl, i1 false) + %unused = call i1 @llvm.coro.end(i8* %hdl, i1 false) ret i8* %hdl } @@ -692,7 +692,7 @@ a coroutine user are responsible to makes sure there is no data races. Example: """""""" -.. code-block:: text +.. code-block:: llvm define i8* @f(i32 %n) { entry: @@ -812,7 +812,7 @@ pointer that was returned by prior `coro.begin` call. Example (custom deallocation function): """"""""""""""""""""""""""""""""""""""" -.. code-block:: text +.. code-block:: llvm cleanup: %mem = call i8* @llvm.coro.free(token %id, i8* %frame) @@ -827,7 +827,7 @@ Example (custom deallocation function): Example (standard deallocation functions): """""""""""""""""""""""""""""""""""""""""" -.. code-block:: text +.. code-block:: llvm cleanup: %mem = call i8* @llvm.coro.free(token %id, i8* %frame) @@ -864,7 +864,7 @@ when possible. Example: """""""" -.. code-block:: text +.. code-block:: llvm entry: %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null) @@ -955,41 +955,90 @@ A frontend should emit exactly one `coro.id` intrinsic per coroutine. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ :: - declare void @llvm.coro.end(i8* , i1 ) + declare i1 @llvm.coro.end(i8* , i1 ) Overview: """"""""" The '``llvm.coro.end``' marks the point where execution of the resume part of -the coroutine should end and control returns back to the caller. +the coroutine should end and control should return to the caller. Arguments: """""""""" -The first argument should refer to the coroutine handle of the enclosing coroutine. +The first argument should refer to the coroutine handle of the enclosing +coroutine. A frontend is allowed to supply null as the first parameter, in this +case `coro-early` pass will replace the null with an appropriate coroutine +handle value. The second argument should be `true` if this coro.end is in the block that is -part of the unwind sequence leaving the coroutine body due to exception prior to -the first reaching any suspend points, and `false` otherwise. +part of the unwind sequence leaving the coroutine body due to an exception and +`false` otherwise. Semantics: """""""""" -The `coro.end`_ intrinsic is a no-op during an initial invocation of the -coroutine. When the coroutine resumes, the intrinsic marks the point when -coroutine need to return control back to the caller. +The purpose of this intrinsic is to allow frontends to mark the cleanup and +other code that is only relevant during the initial invocation of the coroutine +and should not be present in resume and destroy parts. -This intrinsic is removed by the CoroSplit pass when a coroutine is split into -the start, resume and destroy parts. In start part, the intrinsic is removed, -in resume and destroy parts, it is replaced with `ret void` instructions and +This intrinsic is lowered when a coroutine is split into +the start, resume and destroy parts. In the start part, it is a no-op, +in resume and destroy parts, it is replaced with `ret void` instruction and the rest of the block containing `coro.end` instruction is discarded. - In landing pads it is replaced with an appropriate instruction to unwind to -caller. +caller. The handling of coro.end differs depending on whether the target is +using landingpad or WinEH exception model. + +For landingpad based exception model, it is expected that frontend uses the +`coro.end`_ intrinsic as follows: + +.. code-block:: llvm + + ehcleanup: + %InResumePart = call i1 @llvm.coro.end(i8* null, i1 true) + br i1 %InResumePart, label %eh.resume, label %cleanup.cont -A frontend is allowed to supply null as the first parameter, in this case -`coro-early` pass will replace the null with an appropriate coroutine handle -value. + cleanup.cont: + ; rest of the cleanup + + eh.resume: + %exn = load i8*, i8** %exn.slot, align 8 + %sel = load i32, i32* %ehselector.slot, align 4 + %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn, 0 + %lpad.val29 = insertvalue { i8*, i32 } %lpad.val, i32 %sel, 1 + resume { i8*, i32 } %lpad.val29 + +The `CoroSpit` pass replaces `coro.end` with ``True`` in the resume functions, +thus leading to immediate unwind to the caller, whereas in start function it +is replaced with ``False``, thus allowing to proceed to the rest of the cleanup +code that is only needed during initial invocation of the coroutine. + +For Windows Exception handling model, a frontend should attach a funclet bundle +referring to an enclosing cleanuppad as follows: + +.. code-block:: llvm + + ehcleanup: + %tok = cleanuppad within none [] + %unused = call i1 @llvm.coro.end(i8* null, i1 true) [ "funclet"(token %tok) ] + cleanupret from %tok unwind label %RestOfTheCleanup + +The `CoroSplit` pass, if the funclet bundle is present, will insert +``cleanupret from %tok unwind to caller`` before +the `coro.end`_ intrinsic and will remove the rest of the block. + +The following table summarizes the handling of `coro.end`_ intrinsic. + ++--------------------------+-------------------+-------------------------------+ +| | In Start Function | In Resume/Destroy Functions | ++--------------------------+-------------------+-------------------------------+ +|unwind=false | nothing |``ret void`` | ++------------+-------------+-------------------+-------------------------------+ +| | WinEH | nothing |``cleanupret unwind to caller``| +|unwind=true +-------------+-------------------+-------------------------------+ +| | Landingpad | nothing | nothing | ++------------+-------------+-------------------+-------------------------------+ .. _coro.suspend: .. _suspend points: @@ -1025,7 +1074,7 @@ basic blocks. Example (normal suspend point): """"""""""""""""""""""""""""""" -.. code-block:: text +.. code-block:: llvm %0 = call i8 @llvm.coro.suspend(token none, i1 false) switch i8 %0, label %suspend [i8 0, label %resume @@ -1034,7 +1083,7 @@ Example (normal suspend point): Example (final suspend point): """""""""""""""""""""""""""""" -.. code-block:: text +.. code-block:: llvm while.end: %s.final = call i8 @llvm.coro.suspend(token none, i1 true) @@ -1095,10 +1144,10 @@ In such a case, a coroutine should be ready for resumption prior to a call to a different thread possibly prior to `async_op` call returning control back to the coroutine: -.. code-block:: text +.. code-block:: llvm %save1 = call token @llvm.coro.save(i8* %hdl) - call void async_op1(i8* %hdl) + call void @async_op1(i8* %hdl) %suspend1 = call i1 @llvm.coro.suspend(token %save1, i1 false) switch i8 %suspend1, label %suspend [i8 0, label %resume1 i8 1, label %cleanup] diff --git a/docs/DeveloperPolicy.rst b/docs/DeveloperPolicy.rst index 9ec6fb84636f451900ea4ff731533c5ec09b5bec..97e0572343798a35191efd2fd3f1be4d72cbf74c 100644 --- a/docs/DeveloperPolicy.rst +++ b/docs/DeveloperPolicy.rst @@ -62,7 +62,7 @@ way to see what other people are interested in and watching the flow of the project as a whole. We recommend that active developers register an email account with `LLVM -Bugzilla `_ and preferably subscribe to the `llvm-bugs +Bugzilla `_ and preferably subscribe to the `llvm-bugs `_ email list to keep track of bugs and enhancements occurring in LLVM. We really appreciate people who are proactive at catching incoming bugs in their components and dealing with them @@ -261,7 +261,7 @@ the future that the change is responsible for. For example: * The changes should not cause performance or correctness regressions in code compiled by LLVM on all applicable targets. -* You are expected to address any `Bugzilla bugs `_ that +* You are expected to address any `Bugzilla bugs `_ that result from your change. We prefer for this to be handled before submission but understand that it isn't diff --git a/docs/Extensions.rst b/docs/Extensions.rst index 2b12123cdf6889ba18e0e5a976490281d65b7abc..14fea30204b4cb235a1175f6889daffc4687aa47 100644 --- a/docs/Extensions.rst +++ b/docs/Extensions.rst @@ -204,9 +204,49 @@ For example, the following code creates two sections named ``.text``. The unique number is not present in the resulting object at all. It is just used in the assembler to differentiate the sections. +The 'o' flag is mapped to SHF_LINK_ORDER. If it is present, a symbol +must be given that identifies the section to be placed is the +.sh_link. + +.. code-block:: gas + + .section .foo,"a",@progbits + .Ltmp: + .section .bar,"ao",@progbits,.Ltmp + +which is equivalent to just + +.. code-block:: gas + + .section .foo,"a",@progbits + .section .bar,"ao",@progbits,.foo + + Target Specific Behaviour ========================= +X86 +--- + +Relocations +^^^^^^^^^^^ + +``@ABS8`` can be applied to symbols which appear as immediate operands to +instructions that have an 8-bit immediate form for that operand. It causes +the assembler to use the 8-bit form and an 8-bit relocation (e.g. ``R_386_8`` +or ``R_X86_64_8``) for the symbol. + +For example: + +.. code-block:: gas + + cmpq $foo@ABS8, %rdi + +This causes the assembler to select the form of the 64-bit ``cmpq`` instruction +that takes an 8-bit immediate operand that is sign extended to 64 bits, as +opposed to ``cmpq $foo, %rdi`` which takes a 32-bit immediate operand. This +is also not the same as ``cmpb $foo, %dil``, which is an 8-bit comparison. + Windows on ARM -------------- diff --git a/docs/FaultMaps.rst b/docs/FaultMaps.rst index 4ecdd86d7693c4bb31b38630176200c6cd757389..d63ff5a84394808d50299b8b2785121b9e18bbc2 100644 --- a/docs/FaultMaps.rst +++ b/docs/FaultMaps.rst @@ -47,12 +47,18 @@ The format of this section is uint32 : NumFaultingPCs uint32 : Reserved (expected to be 0) FunctionFaultInfo[NumFaultingPCs] { - uint32 : FaultKind = FaultMaps::FaultingLoad (only legal value currently) + uint32 : FaultKind uint32 : FaultingPCOffset uint32 : HandlerPCOffset } } +FailtKind describes the reason of expected fault. Currently three kind +of faults are supported: + + 1. ``FaultMaps::FaultingLoad`` - fault due to load from memory. + 2. ``FaultMaps::FaultingLoadStore`` - fault due to instruction load and store. + 3. ``FaultMaps::FaultingStore`` - fault due to store to memory. The ``ImplicitNullChecks`` pass =============================== diff --git a/docs/GettingStarted.rst b/docs/GettingStarted.rst index 969976cbccc8f690088f6c6313829fb1aa9555f2..a88860310f642937a553d512bfa3f28e6b5af044 100644 --- a/docs/GettingStarted.rst +++ b/docs/GettingStarted.rst @@ -58,6 +58,12 @@ Here's the short story for getting up and running quickly with LLVM: * ``cd llvm/tools`` * ``svn co http://llvm.org/svn/llvm-project/lld/trunk lld`` +#. Checkout Polly Loop Optimizer **[Optional]**: + + * ``cd where-you-want-llvm-to-live`` + * ``cd llvm/tools`` + * ``svn co http://llvm.org/svn/llvm-project/polly/trunk polly`` + #. Checkout Compiler-RT (required to build the sanitizers) **[Optional]**: * ``cd where-you-want-llvm-to-live`` @@ -727,8 +733,8 @@ Or a combination of multiple projects: % mkdir clang-build && cd clang-build % cmake -GNinja ../llvm-project/llvm -DLLVM_ENABLE_PROJECTS="clang;libcxx;libcxxabi" -A helper script is provided in `llvm/utils/git-svn/git-llvm`. After you add it -to your path, you can push committed changes upstream with `git llvm push`. +A helper script is provided in ``llvm/utils/git-svn/git-llvm``. After you add it +to your path, you can push committed changes upstream with ``git llvm push``. .. code-block:: console @@ -737,10 +743,22 @@ to your path, you can push committed changes upstream with `git llvm push`. While this is using SVN under the hood, it does not require any interaction from you with git-svn. -After a few minutes, `git pull` should get back the changes as they were -committed. Note that a current limitation is that `git` does not directly record -file rename, and thus it is propagated to SVN as a combination of delete-add -instead of a file rename. +After a few minutes, ``git pull`` should get back the changes as they were +committed. Note that a current limitation is that ``git`` does not directly +record file rename, and thus it is propagated to SVN as a combination of +delete-add instead of a file rename. + +The SVN revision of each monorepo commit can be found in the commit notes. git +does not fetch notes by default. The following commands will fetch the notes and +configure git to fetch future notes. Use ``git notes show $commit`` to look up +the SVN revision of a git commit. The notes show up ``git log``, and searching +the log is currently the recommended way to look up the git commit for a given +SVN revision. + +.. code-block:: console + + % git config --add remote.origin.fetch +refs/notes/commits:refs/notes/commits + % git fetch If you are using `arc` to interact with Phabricator, you need to manually put it at the root of the checkout: @@ -799,7 +817,8 @@ used by people developing LLVM. +-------------------------+----------------------------------------------------+ | LLVM_ENABLE_SPHINX | Build sphinx-based documentation from the source | | | code. This is disabled by default because it is | -| | slow and generates a lot of output. | +| | slow and generates a lot of output. Sphinx version | +| | 1.5 or later recommended. | +-------------------------+----------------------------------------------------+ | LLVM_BUILD_LLVM_DYLIB | Generate libLLVM.so. This library contains a | | | default set of LLVM components that can be | @@ -1144,7 +1163,7 @@ the `Command Guide `_. ``llc`` ``llc`` is the LLVM backend compiler, which translates LLVM bitcode to a - native code assembly file or to C code (with the ``-march=c`` option). + native code assembly file. ``opt`` diff --git a/docs/HowToAddABuilder.rst b/docs/HowToAddABuilder.rst index 9e06a3276470b9a3a36996a683d68f9b784a91a6..fcc2293de052e3b9ad455d33fd29ed00561533d2 100644 --- a/docs/HowToAddABuilder.rst +++ b/docs/HowToAddABuilder.rst @@ -6,9 +6,19 @@ Introduction ============ This document contains information about adding a build configuration and -buildslave to private slave builder to LLVM Buildbot Infrastructure -``_. +buildslave to private slave builder to LLVM Buildbot Infrastructure. +Buildmasters +============ + +There are two buildmasters running. + +* The main buildmaster at ``_. All builders attached + to this machine will notify commit authors every time they break the build. +* The staging buildbot at ``_. All builders attached + to this machine will be completely silent by default when the build is broken. + Builders for experimental backends should generally be attached to this + buildmaster. Steps To Add Builder To LLVM Buildbot ===================================== @@ -73,6 +83,11 @@ Here are the steps you can follow to do so: * slaves are added to ``buildbot/osuosl/master/config/slaves.py`` * builders are added to ``buildbot/osuosl/master/config/builders.py`` + It is possible to whitelist email addresses to unconditionally receive notifications + on build failure; for this you'll need to add an ``InformativeMailNotifier`` to + ``buildbot/osuosl/master/config/status.py``. This is particularly useful for the + staging buildmaster which is silent otherwise. + #. Send the buildslave access name and the access password directly to `Galina Kistanova `_, and wait till she will let you know that your changes are applied and buildmaster is diff --git a/docs/HowToSubmitABug.rst b/docs/HowToSubmitABug.rst index 9f997d2757dd967854599329ca30c73935f62c7b..25cb2c8c80d3681495c6ae789d969dea81443d37 100644 --- a/docs/HowToSubmitABug.rst +++ b/docs/HowToSubmitABug.rst @@ -19,7 +19,7 @@ section to narrow down the bug so that the person who fixes it will be able to find the problem more easily. Once you have a reduced test-case, go to `the LLVM Bug Tracking System -`_ and fill out the form with the +`_ and fill out the form with the necessary details (note that you don't need to pick a category, just use the "new-bugs" category if you're not sure). The bug description should contain the following information: diff --git a/docs/HowToUseAttributes.rst b/docs/HowToUseAttributes.rst index 66c44c01f631cb633cd22cb5a4919f5262ee2650..1d05e238587406f4d2aca5729ddc70fc4de5ad79 100644 --- a/docs/HowToUseAttributes.rst +++ b/docs/HowToUseAttributes.rst @@ -38,36 +38,35 @@ Because attributes are no longer represented as a bit mask, you will need to convert any code which does treat them as a bit mask to use the new query methods on the Attribute class. -``AttributeSet`` -================ +``AttributeList`` +================= -The ``AttributeSet`` class replaces the old ``AttributeList`` class. The -``AttributeSet`` stores a collection of Attribute objects for each kind of -object that may have an attribute associated with it: the function as a -whole, the return type, or the function's parameters. A function's attributes -are at index ``AttributeSet::FunctionIndex``; the return type's attributes are -at index ``AttributeSet::ReturnIndex``; and the function's parameters' -attributes are at indices 1, ..., n (where 'n' is the number of parameters). -Most methods on the ``AttributeSet`` class take an index parameter. +The ``AttributeList`` stores a collection of Attribute objects for each kind of +object that may have an attribute associated with it: the function as a whole, +the return type, or the function's parameters. A function's attributes are at +index ``AttributeList::FunctionIndex``; the return type's attributes are at +index ``AttributeList::ReturnIndex``; and the function's parameters' attributes +are at indices 1, ..., n (where 'n' is the number of parameters). Most methods +on the ``AttributeList`` class take an index parameter. -An ``AttributeSet`` is also a uniqued and immutable object. You create an -``AttributeSet`` through the ``AttributeSet::get`` methods. You can add and -remove attributes, which result in the creation of a new ``AttributeSet``. +An ``AttributeList`` is also a uniqued and immutable object. You create an +``AttributeList`` through the ``AttributeList::get`` methods. You can add and +remove attributes, which result in the creation of a new ``AttributeList``. -An ``AttributeSet`` object is designed to be passed around by value. +An ``AttributeList`` object is designed to be passed around by value. -Note: It is advised that you do *not* use the ``AttributeSet`` "introspection" +Note: It is advised that you do *not* use the ``AttributeList`` "introspection" methods (e.g. ``Raw``, ``getRawPointer``, etc.). These methods break encapsulation, and may be removed in a future release (i.e. LLVM 4.0). ``AttrBuilder`` =============== -Lastly, we have a "builder" class to help create the ``AttributeSet`` object +Lastly, we have a "builder" class to help create the ``AttributeList`` object without having to create several different intermediate uniqued -``AttributeSet`` objects. The ``AttrBuilder`` class allows you to add and +``AttributeList`` objects. The ``AttrBuilder`` class allows you to add and remove attributes at will. The attributes won't be uniqued until you call the -appropriate ``AttributeSet::get`` method. +appropriate ``AttributeList::get`` method. An ``AttrBuilder`` object is *not* designed to be passed around by value. It should be passed by reference. diff --git a/docs/LLVMBuild.rst b/docs/LLVMBuild.rst index a93dcf644084d2eb9ac4af5ece264865a99f9e81..622780aee3124f806e30968119bf726363fdbfef 100644 --- a/docs/LLVMBuild.rst +++ b/docs/LLVMBuild.rst @@ -54,7 +54,7 @@ handled by another build system (See: :doc:`CMake `). The build system implementation will load the relevant contents of the LLVMBuild files and use that to drive the actual project build. Typically, the build system will only need to load this information at -"configure" time, and use it to generative native information. Build +"configure" time, and use it to generate native information. Build systems will also handle automatically reconfiguring their information when the contents of the ``LLVMBuild.txt`` files change. diff --git a/docs/LangRef.rst b/docs/LangRef.rst index 47d96290e6eba52135e2b19b89a8752ece9bbca5..363847af0a8a877311fe5bae506076d906c827cd 100644 --- a/docs/LangRef.rst +++ b/docs/LangRef.rst @@ -195,7 +195,7 @@ linkage: ``private`` Global values with "``private``" linkage are only directly accessible by objects in the current module. In particular, linking - code into a module with an private global value may cause the + code into a module with a private global value may cause the private to be renamed as necessary to avoid collisions. Because the symbol is private to the module, all references can be updated. This doesn't show up in any symbol table in the object file. @@ -1474,8 +1474,10 @@ example: any mutable state (e.g. memory, control registers, etc) visible to caller functions. It does not write through any pointer arguments (including ``byval`` arguments) and never changes any state visible - to callers. This means that it cannot unwind exceptions by calling - the ``C++`` exception throwing methods. + to callers. This means while it cannot unwind exceptions by calling + the ``C++`` exception throwing methods (since they write to memory), there may + be non-``C++`` mechanisms that throw exceptions without writing to LLVM + visible memory. On an argument, this attribute indicates that the function does not dereference that pointer argument, even though it may read or write the @@ -1487,9 +1489,10 @@ example: caller functions. It may dereference pointer arguments and read state that may be set in the caller. A readonly function always returns the same value (or unwinds an exception identically) when - called with the same set of arguments and global state. It cannot - unwind an exception by calling the ``C++`` exception throwing - methods. + called with the same set of arguments and global state. This means while it + cannot unwind exceptions by calling the ``C++`` exception throwing methods + (since they write to memory), there may be non-``C++`` mechanisms that throw + exceptions without writing to LLVM visible memory. On an argument, this attribute indicates that the function does not write through this pointer argument, even though it may write to the memory that @@ -1809,6 +1812,9 @@ as follows: must be a multiple of 8-bits. If omitted, the natural stack alignment defaults to "unspecified", which does not prevent any alignment promotions. +``A
`` + Specifies the address space of objects created by '``alloca``'. + Defaults to the default address space of 0. ``p[n]:::`` This specifies the *size* of a pointer and its ```` and ````\erred alignments for address space ``n``. All sizes are in @@ -2191,6 +2197,10 @@ otherwise unsafe floating point transformations. Allow Reciprocal - Allow optimizations to use the reciprocal of an argument rather than perform division. +``contract`` + Allow floating-point contraction (e.g. fusing a multiply followed by an + addition into a fused multiply-and-add). + ``fast`` Fast - Allow algebraically equivalent transformations that may dramatically change results in floating point (e.g. reassociate). This @@ -3199,6 +3209,22 @@ resulting assembly string is parsed by LLVM's integrated assembler unless it is disabled -- even when emitting a ``.s`` file -- and thus must contain assembly syntax known to LLVM. +LLVM also supports a few more substitions useful for writing inline assembly: + +- ``${:uid}``: Expands to a decimal integer unique to this inline assembly blob. + This substitution is useful when declaring a local label. Many standard + compiler optimizations, such as inlining, may duplicate an inline asm blob. + Adding a blob-unique identifier ensures that the two labels will not conflict + during assembly. This is used to implement `GCC's %= special format + string `_. +- ``${:comment}``: Expands to the comment character of the current target's + assembly dialect. This is usually ``#``, but many targets use other strings, + such as ``;``, ``//``, or ``!``. +- ``${:private}``: Expands to the assembler private label prefix. Labels with + this prefix will not appear in the symbol table of the assembled object. + Typically the prefix is ``L``, but targets may use other strings. ``.L`` is + relatively popular. + LLVM's support for inline asm is modeled closely on the requirements of Clang's GCC-compatible inline-asm support. Thus, the feature-set and the constraint and modifier codes listed here are similar or identical to those in GCC's inline asm @@ -3987,7 +4013,9 @@ DICompileUnit ``retainedTypes:``, ``subprograms:``, ``globals:``, ``imports:`` and ``macros:`` fields are tuples containing the debug info to be emitted along with the compile unit, regardless of code optimizations (some nodes are only emitted if there are -references to them from instructions). +references to them from instructions). The ``debugInfoForProfiling:`` field is a +boolean indicating whether or not line-table discriminators are updated to +provide more-accurate debug info for profiling results. .. code-block:: text @@ -4345,24 +4373,42 @@ parameter, and it will be included in the ``variables:`` field of its DIExpression """""""""""" -``DIExpression`` nodes represent DWARF expression sequences. They are used in -:ref:`debug intrinsics` (such as ``llvm.dbg.declare``) to -describe how the referenced LLVM variable relates to the source language -variable. +``DIExpression`` nodes represent expressions that are inspired by the DWARF +expression language. They are used in :ref:`debug intrinsics` +(such as ``llvm.dbg.declare`` and ``llvm.dbg.value``) to describe how the +referenced LLVM variable relates to the source language variable. The current supported vocabulary is limited: - ``DW_OP_deref`` dereferences the working expression. - ``DW_OP_plus, 93`` adds ``93`` to the working expression. -- ``DW_OP_bit_piece, 16, 8`` specifies the offset and size (``16`` and ``8`` - here, respectively) of the variable piece from the working expression. +- ``DW_OP_LLVM_fragment, 16, 8`` specifies the offset and size (``16`` and ``8`` + here, respectively) of the variable fragment from the working expression. Note + that contrary to DW_OP_bit_piece, the offset is describing the the location + within the described source variable. +- ``DW_OP_swap`` swaps top two stack entries. +- ``DW_OP_xderef`` provides extended dereference mechanism. The entry at the top + of the stack is treated as an address. The second stack entry is treated as an + address space identifier. +- ``DW_OP_stack_value`` marks a constant value. + +DIExpression nodes that contain a ``DW_OP_stack_value`` operator are standalone +location descriptions that describe constant values. This form is used to +describe global constants that have been optimized away. All other expressions +are modifiers to another location: A debug intrinsic ties a location and a +DIExpression together. Contrary to DWARF expressions, a DIExpression always +describes the *value* of a source variable and never its *address*. In DWARF +terminology, a DIExpression can always be considered an implicit location +description regardless whether it contains a ``DW_OP_stack_value`` or not. .. code-block:: text !0 = !DIExpression(DW_OP_deref) !1 = !DIExpression(DW_OP_plus, 3) !2 = !DIExpression(DW_OP_bit_piece, 3, 7) - !3 = !DIExpression(DW_OP_deref, DW_OP_plus, 3, DW_OP_bit_piece, 3, 7) + !3 = !DIExpression(DW_OP_deref, DW_OP_plus, 3, DW_OP_LLVM_fragment, 3, 7) + !4 = !DIExpression(DW_OP_constu, 2, DW_OP_swap, DW_OP_xderef) + !5 = !DIExpression(DW_OP_constu, 42, DW_OP_stack_value) DIObjCProperty """""""""""""" @@ -4415,37 +4461,156 @@ appear in the included source file. ^^^^^^^^^^^^^^^^^^^ In LLVM IR, memory does not have types, so LLVM's own type system is not -suitable for doing TBAA. Instead, metadata is added to the IR to -describe a type system of a higher level language. This can be used to -implement typical C/C++ TBAA, but it can also be used to implement -custom alias analysis behavior for other languages. +suitable for doing type based alias analysis (TBAA). Instead, metadata is +added to the IR to describe a type system of a higher level language. This +can be used to implement C/C++ strict type aliasing rules, but it can also +be used to implement custom alias analysis behavior for other languages. + +This description of LLVM's TBAA system is broken into two parts: +:ref:`Semantics` talks about high level issues, and +:ref:`Representation` talks about the metadata +encoding of various entities. + +It is always possible to trace any TBAA node to a "root" TBAA node (details +in the :ref:`Representation` section). TBAA +nodes with different roots have an unknown aliasing relationship, and LLVM +conservatively infers ``MayAlias`` between them. The rules mentioned in +this section only pertain to TBAA nodes living under the same root. + +.. _tbaa_node_semantics: + +Semantics +""""""""" + +The TBAA metadata system, referred to as "struct path TBAA" (not to be +confused with ``tbaa.struct``), consists of the following high level +concepts: *Type Descriptors*, further subdivided into scalar type +descriptors and struct type descriptors; and *Access Tags*. + +**Type descriptors** describe the type system of the higher level language +being compiled. **Scalar type descriptors** describe types that do not +contain other types. Each scalar type has a parent type, which must also +be a scalar type or the TBAA root. Via this parent relation, scalar types +within a TBAA root form a tree. **Struct type descriptors** denote types +that contain a sequence of other type descriptors, at known offsets. These +contained type descriptors can either be struct type descriptors themselves +or scalar type descriptors. + +**Access tags** are metadata nodes attached to load and store instructions. +Access tags use type descriptors to describe the *location* being accessed +in terms of the type system of the higher level language. Access tags are +tuples consisting of a base type, an access type and an offset. The base +type is a scalar type descriptor or a struct type descriptor, the access +type is a scalar type descriptor, and the offset is a constant integer. + +The access tag ``(BaseTy, AccessTy, Offset)`` can describe one of two +things: + + * If ``BaseTy`` is a struct type, the tag describes a memory access (load + or store) of a value of type ``AccessTy`` contained in the struct type + ``BaseTy`` at offset ``Offset``. + + * If ``BaseTy`` is a scalar type, ``Offset`` must be 0 and ``BaseTy`` and + ``AccessTy`` must be the same; and the access tag describes a scalar + access with scalar type ``AccessTy``. + +We first define an ``ImmediateParent`` relation on ``(BaseTy, Offset)`` +tuples this way: + + * If ``BaseTy`` is a scalar type then ``ImmediateParent(BaseTy, 0)`` is + ``(ParentTy, 0)`` where ``ParentTy`` is the parent of the scalar type as + described in the TBAA metadata. ``ImmediateParent(BaseTy, Offset)`` is + undefined if ``Offset`` is non-zero. + + * If ``BaseTy`` is a struct type then ``ImmediateParent(BaseTy, Offset)`` + is ``(NewTy, NewOffset)`` where ``NewTy`` is the type contained in + ``BaseTy`` at offset ``Offset`` and ``NewOffset`` is ``Offset`` adjusted + to be relative within that inner type. + +A memory access with an access tag ``(BaseTy1, AccessTy1, Offset1)`` +aliases a memory access with an access tag ``(BaseTy2, AccessTy2, +Offset2)`` if either ``(BaseTy1, Offset1)`` is reachable from ``(Base2, +Offset2)`` via the ``Parent`` relation or vice versa. + +As a concrete example, the type descriptor graph for the following program -The current metadata format is very simple. TBAA metadata nodes have up -to three fields, e.g.: +.. code-block:: c -.. code-block:: llvm + struct Inner { + int i; // offset 0 + float f; // offset 4 + }; + + struct Outer { + float f; // offset 0 + double d; // offset 4 + struct Inner inner_a; // offset 12 + }; + + void f(struct Outer* outer, struct Inner* inner, float* f, int* i, char* c) { + outer->f = 0; // tag0: (OuterStructTy, FloatScalarTy, 0) + outer->inner_a.i = 0; // tag1: (OuterStructTy, IntScalarTy, 12) + outer->inner_a.f = 0.0; // tag2: (OuterStructTy, IntScalarTy, 16) + *f = 0.0; // tag3: (FloatScalarTy, FloatScalarTy, 0) + } - !0 = !{ !"an example type tree" } - !1 = !{ !"int", !0 } - !2 = !{ !"float", !0 } - !3 = !{ !"const float", !2, i64 1 } - -The first field is an identity field. It can be any value, usually a -metadata string, which uniquely identifies the type. The most important -name in the tree is the name of the root node. Two trees with different -root node names are entirely disjoint, even if they have leaves with -common names. - -The second field identifies the type's parent node in the tree, or is -null or omitted for a root node. A type is considered to alias all of -its descendants and all of its ancestors in the tree. Also, a type is -considered to alias all types in other trees, so that bitcode produced -from multiple front-ends is handled conservatively. - -If the third field is present, it's an integer which if equal to 1 -indicates that the type is "constant" (meaning +is (note that in C and C++, ``char`` can be used to access any arbitrary +type): + +.. code-block:: text + + Root = "TBAA Root" + CharScalarTy = ("char", Root, 0) + FloatScalarTy = ("float", CharScalarTy, 0) + DoubleScalarTy = ("double", CharScalarTy, 0) + IntScalarTy = ("int", CharScalarTy, 0) + InnerStructTy = {"Inner" (IntScalarTy, 0), (FloatScalarTy, 4)} + OuterStructTy = {"Outer", (FloatScalarTy, 0), (DoubleScalarTy, 4), + (InnerStructTy, 12)} + + +with (e.g.) ``ImmediateParent(OuterStructTy, 12)`` = ``(InnerStructTy, +0)``, ``ImmediateParent(InnerStructTy, 0)`` = ``(IntScalarTy, 0)``, and +``ImmediateParent(IntScalarTy, 0)`` = ``(CharScalarTy, 0)``. + +.. _tbaa_node_representation: + +Representation +"""""""""""""" + +The root node of a TBAA type hierarchy is an ``MDNode`` with 0 operands or +with exactly one ``MDString`` operand. + +Scalar type descriptors are represented as an ``MDNode`` s with two +operands. The first operand is an ``MDString`` denoting the name of the +struct type. LLVM does not assign meaning to the value of this operand, it +only cares about it being an ``MDString``. The second operand is an +``MDNode`` which points to the parent for said scalar type descriptor, +which is either another scalar type descriptor or the TBAA root. Scalar +type descriptors can have an optional third argument, but that must be the +constant integer zero. + +Struct type descriptors are represented as ``MDNode`` s with an odd number +of operands greater than 1. The first operand is an ``MDString`` denoting +the name of the struct type. Like in scalar type descriptors the actual +value of this name operand is irrelevant to LLVM. After the name operand, +the struct type descriptors have a sequence of alternating ``MDNode`` and +``ConstantInt`` operands. With N starting from 1, the 2N - 1 th operand, +an ``MDNode``, denotes a contained field, and the 2N th operand, a +``ConstantInt``, is the offset of the said contained field. The offsets +must be in non-decreasing order. + +Access tags are represented as ``MDNode`` s with either 3 or 4 operands. +The first operand is an ``MDNode`` pointing to the node representing the +base type. The second operand is an ``MDNode`` pointing to the node +representing the access type. The third operand is a ``ConstantInt`` that +states the offset of the access. If a fourth field is present, it must be +a ``ConstantInt`` valued at 0 or 1. If it is 1 then the access tag states +that the location being accessed is "constant" (meaning ``pointsToConstantMemory`` should return true; see `other useful -AliasAnalysis methods `_). +AliasAnalysis methods `_). The TBAA root of +the access type and the base type of an access tag must be the same, and +that is the TBAA root of the access tag. '``tbaa.struct``' Metadata ^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -4956,11 +5121,48 @@ Examples: !0 = !{!"magic ptr"} !1 = !{!"other ptr"} +The invariant.group metadata must be dropped when replacing one pointer by +another based on aliasing information. This is because invariant.group is tied +to the SSA value of the pointer operand. + +.. code-block:: llvm + + %v = load i8, i8* %x, !invariant.group !0 + ; if %x mustalias %y then we can replace the above instruction with + %v = load i8, i8* %y + + '``type``' Metadata ^^^^^^^^^^^^^^^^^^^ See :doc:`TypeMetadata`. +'``associated``' Metadata +^^^^^^^^^^^^^^^^^^^^^^^^^ + +The ``associated`` metadata may be attached to a global object +declaration with a single argument that references another global object. + +This metadata prevents discarding of the global object in linker GC +unless the referenced object is also discarded. The linker support for +this feature is spotty. For best compatibility, globals carrying this +metadata may also: + +- Be in a comdat with the referenced global. +- Be in @llvm.compiler.used. +- Have an explicit section with a name which is a valid C identifier. + +It does not have any effect on non-ELF targets. + +Example: + +.. code-block:: llvm + + $a = comdat any + @a = global i32 1, comdat $a + @b = internal global i32 2, comdat $a, section "abc", !associated !0 + !0 = !{i32* @a} + Module Flags Metadata ===================== @@ -6184,7 +6386,9 @@ The value produced is the unsigned integer quotient of the two operands. Note that unsigned integer division and signed integer division are distinct operations; for signed integer division, use '``sdiv``'. -Division by zero leads to undefined behavior. +Division by zero is undefined behavior. For vectors, if any element +of the divisor is zero, the operation has undefined behavior. + If the ``exact`` keyword is present, the result value of the ``udiv`` is a :ref:`poison value ` if %op1 is not a multiple of %op2 (as @@ -6229,9 +6433,10 @@ rounded towards zero. Note that signed integer division and unsigned integer division are distinct operations; for unsigned integer division, use '``udiv``'. -Division by zero leads to undefined behavior. Overflow also leads to -undefined behavior; this is a rare case, but can occur, for example, by -doing a 32-bit division of -2147483648 by -1. +Division by zero is undefined behavior. For vectors, if any element +of the divisor is zero, the operation has undefined behavior. +Overflow also leads to undefined behavior; this is a rare case, but can +occur, for example, by doing a 32-bit division of -2147483648 by -1. If the ``exact`` keyword is present, the result value of the ``sdiv`` is a :ref:`poison value ` if the result would be rounded. @@ -6314,8 +6519,10 @@ remainder. Note that unsigned integer remainder and signed integer remainder are distinct operations; for signed integer remainder, use '``srem``'. - -Taking the remainder of a division by zero leads to undefined behavior. + +Taking the remainder of a division by zero is undefined behavior. +For vectors, if any element of the divisor is zero, the operation has +undefined behavior. Example: """""""" @@ -6365,7 +6572,9 @@ operation `_. Note that signed integer remainder and unsigned integer remainder are distinct operations; for unsigned integer remainder, use '``urem``'. -Taking the remainder of a division by zero leads to undefined behavior. +Taking the remainder of a division by zero is undefined behavior. +For vectors, if any element of the divisor is zero, the operation has +undefined behavior. Overflow also leads to undefined behavior; this is a rare case, but can occur, for example, by taking the remainder of a 32-bit division of -2147483648 by -1. (The remainder doesn't actually overflow, but this @@ -6997,7 +7206,7 @@ Syntax: :: - = alloca [inalloca] [, ] [, align ] ; yields type*:result + = alloca [inalloca] [, ] [, align ] [, addrspace()] ; yields type addrspace(num)*:result Overview: """"""""" @@ -7005,7 +7214,7 @@ Overview: The '``alloca``' instruction allocates memory on the stack frame of the currently executing function, to be automatically released when this function returns to its caller. The object is always allocated in the -generic address space (address space zero). +address space for allocas indicated in the datalayout. Arguments: """""""""" @@ -7594,8 +7803,10 @@ offsets implied by the indices to the base address with infinitely precise signed arithmetic are not an *in bounds* address of that allocated object. The *in bounds* addresses for an allocated object are all the addresses that point into the object, plus the address one byte -past the end. In cases where the base is a vector of pointers the -``inbounds`` keyword applies to each of the computations element-wise. +past the end. The only *in bounds* address for a null pointer in the +default address-space is the null pointer itself. In cases where the +base is a vector of pointers the ``inbounds`` keyword applies to each +of the computations element-wise. If the ``inbounds`` keyword is not present, the offsets are added to the base address with silently-wrapping two's complement arithmetic. If the @@ -9598,7 +9809,7 @@ Semantics: compile-time-known constant value. The return value type of :ref:`llvm.get.dynamic.area.offset ` - must match the target's generic address space's (address space 0) pointer type. + must match the target's default address space's (address space 0) pointer type. '``llvm.prefetch``' Intrinsic ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -10075,21 +10286,20 @@ all types however. Overview: """"""""" -The '``llvm.sqrt``' intrinsics return the sqrt of the specified operand, +The '``llvm.sqrt``' intrinsics return the square root of the specified value, returning the same value as the libm '``sqrt``' functions would, but without trapping or setting ``errno``. Arguments: """""""""" -The argument and return value are floating point numbers of the same -type. +The argument and return value are floating point numbers of the same type. Semantics: """""""""" -This function returns the sqrt of the specified operand if it is a -nonnegative floating point number. +This function returns the square root of the operand if it is a nonnegative +floating point number. '``llvm.powi.*``' Intrinsic ^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -10155,8 +10365,7 @@ The '``llvm.sin.*``' intrinsics return the sine of the operand. Arguments: """""""""" -The argument and return value are floating point numbers of the same -type. +The argument and return value are floating point numbers of the same type. Semantics: """""""""" @@ -10191,8 +10400,7 @@ The '``llvm.cos.*``' intrinsics return the cosine of the operand. Arguments: """""""""" -The argument and return value are floating point numbers of the same -type. +The argument and return value are floating point numbers of the same type. Semantics: """""""""" @@ -10259,13 +10467,13 @@ all types however. Overview: """"""""" -The '``llvm.exp.*``' intrinsics perform the exp function. +The '``llvm.exp.*``' intrinsics compute the base-e exponential of the specified +value. Arguments: """""""""" -The argument and return value are floating point numbers of the same -type. +The argument and return value are floating point numbers of the same type. Semantics: """""""""" @@ -10294,13 +10502,13 @@ all types however. Overview: """"""""" -The '``llvm.exp2.*``' intrinsics perform the exp2 function. +The '``llvm.exp2.*``' intrinsics compute the base-2 exponential of the +specified value. Arguments: """""""""" -The argument and return value are floating point numbers of the same -type. +The argument and return value are floating point numbers of the same type. Semantics: """""""""" @@ -10329,13 +10537,13 @@ all types however. Overview: """"""""" -The '``llvm.log.*``' intrinsics perform the log function. +The '``llvm.log.*``' intrinsics compute the base-e logarithm of the specified +value. Arguments: """""""""" -The argument and return value are floating point numbers of the same -type. +The argument and return value are floating point numbers of the same type. Semantics: """""""""" @@ -10364,13 +10572,13 @@ all types however. Overview: """"""""" -The '``llvm.log10.*``' intrinsics perform the log10 function. +The '``llvm.log10.*``' intrinsics compute the base-10 logarithm of the +specified value. Arguments: """""""""" -The argument and return value are floating point numbers of the same -type. +The argument and return value are floating point numbers of the same type. Semantics: """""""""" @@ -10399,13 +10607,13 @@ all types however. Overview: """"""""" -The '``llvm.log2.*``' intrinsics perform the log2 function. +The '``llvm.log2.*``' intrinsics compute the base-2 logarithm of the specified +value. Arguments: """""""""" -The argument and return value are floating point numbers of the same -type. +The argument and return value are floating point numbers of the same type. Semantics: """""""""" @@ -12579,8 +12787,8 @@ Syntax: :: - declare i32 @llvm.objectsize.i32(i8* , i1 ) - declare i64 @llvm.objectsize.i64(i8* , i1 ) + declare i32 @llvm.objectsize.i32(i8* , i1 , i1 ) + declare i64 @llvm.objectsize.i64(i8* , i1 , i1 ) Overview: """"""""" @@ -12595,11 +12803,16 @@ other object. Arguments: """""""""" -The ``llvm.objectsize`` intrinsic takes two arguments. The first -argument is a pointer to or into the ``object``. The second argument is -a boolean and determines whether ``llvm.objectsize`` returns 0 (if true) -or -1 (if false) when the object size is unknown. The second argument -only accepts constants. +The ``llvm.objectsize`` intrinsic takes three arguments. The first argument is +a pointer to or into the ``object``. The second argument determines whether +``llvm.objectsize`` returns 0 (if true) or -1 (if false) when the object size +is unknown. The third argument controls how ``llvm.objectsize`` acts when +``null`` is used as its pointer argument. If it's true and the pointer is in +address space 0, ``null`` is treated as an opaque value with an unknown number +of bytes. Otherwise, ``llvm.objectsize`` reports 0 bytes available when given +``null``. + +The second and third arguments only accept constants. Semantics: """""""""" @@ -12684,6 +12897,33 @@ sufficient overall improvement in code quality. For this reason, that the optimizer can otherwise deduce or facts that are of little use to the optimizer. +.. _int_ssa_copy: + +'``llvm.ssa_copy``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare type @llvm.ssa_copy(type %operand) returned(1) readnone + +Arguments: +"""""""""" + +The first argument is an operand which is used as the returned value. + +Overview: +"""""""""" + +The ``llvm.ssa_copy`` intrinsic can be used to attach information to +operations by copying them and giving them new names. For example, +the PredicateInfo utility uses it to build Extended SSA form, and +attach various forms of information to operands that dominate specific +uses. It is not meant for general use, only for building temporary +renaming forms that require value splits at certain points. + .. _type.test: '``llvm.type.test``' Intrinsic diff --git a/docs/Lexicon.rst b/docs/Lexicon.rst index de929bec1b0e984b2ca2910e9875892b4ef3051e..5d16091e27e5ceed8c7f9c9b4a525b8ba09cc28a 100644 --- a/docs/Lexicon.rst +++ b/docs/Lexicon.rst @@ -182,7 +182,7 @@ P **PR** Problem report. A bug filed on `the LLVM Bug Tracking System - `_. + `_. **PRE** Partial Redundancy Elimination diff --git a/docs/OptBisect.rst b/docs/OptBisect.rst index e9f1c2541c9c0a94d9d44be1ea3e0ae5da8c6655..5a216d419a6414fd4c12ed6246440bc1a2ba207a 100644 --- a/docs/OptBisect.rst +++ b/docs/OptBisect.rst @@ -60,11 +60,14 @@ like this: clang -O2 -mllvm -opt-bisect-limit=256 my_file.c The -opt-bisect-limit option may also be applied to link-time optimizations by -using a prefix to indicate that this is a plug-in option for the linker. The +using a prefix to indicate that this is a plug-in option for the linker. The following syntax will set a bisect limit for LTO transformations: :: + # When using lld, or ld64 (macOS) + clang -flto -Wl,-mllvm,-opt-bisect-limit=256 my_file.o my_other_file.o + # When using Gold clang -flto -Wl,-plugin-opt,-opt-bisect-limit=256 my_file.o my_other_file.o LTO passes are run by a library instance invoked by the linker. Therefore any @@ -186,12 +189,5 @@ Adding Finer Granularity Once the pass in which an incorrect transformation is performed has been determined, it may be useful to perform further analysis in order to determine -which specific transformation is causing the problem. Ideally all passes -would be instrumented to allow skipping of individual transformations. This -functionality is available through the OptBisect object but it is impractical -to proactively instrument every existing pass. It is hoped that as developers -find that they need a pass to be instrumented they will add the instrumentation -and contribute it back to the LLVM source base. - -Helper functions will be added to simplify this level of instrumentation, but -this work is not yet completed. For more information, contact Andy Kaylor. +which specific transformation is causing the problem. Debug counters +can be used for this purpose. diff --git a/docs/ProgrammersManual.rst b/docs/ProgrammersManual.rst index 1c96046bf40a7b41c266626cd5abcbd4fcace7d5..4fb67e1e6d5f8f4e091b6719526f6eafe00f0183 100644 --- a/docs/ProgrammersManual.rst +++ b/docs/ProgrammersManual.rst @@ -32,7 +32,7 @@ to know when working in the LLVM infrastructure, and the second describes the Core LLVM classes. In the future this manual will be extended with information describing how to use extension libraries, such as dominator information, CFG traversal routines, and useful utilities like the ``InstVisitor`` (`doxygen -`__) template. +`__) template. .. _general: @@ -108,7 +108,7 @@ they don't have some drawbacks (primarily stemming from the fact that ``dynamic_cast<>`` only works on classes that have a v-table). Because they are used so often, you must know what they do and how they work. All of these templates are defined in the ``llvm/Support/Casting.h`` (`doxygen -`__) file (note that you very +`__) file (note that you very rarely have to include this file directly). ``isa<>``: @@ -225,7 +225,7 @@ and clients can call it using any one of: Similarly, APIs which need to return a string may return a ``StringRef`` instance, which can be used directly or converted to an ``std::string`` using the ``str`` member function. See ``llvm/ADT/StringRef.h`` (`doxygen -`__) for more +`__) for more information. You should rarely use the ``StringRef`` class directly, because it contains @@ -482,7 +482,7 @@ that inherits from the ErrorInfo utility, E.g.: } }; - char FileExists::ID; // This should be declared in the C++ file. + char BadFileFormat::ID; // This should be declared in the C++ file. Error printFormattedFile(StringRef Path) { if () @@ -564,18 +564,18 @@ the boolean conversion operator): .. code-block:: c++ - if (auto Err = canFail(...)) + if (auto Err = mayFail(...)) return Err; // Failure value - move error to caller. // Safe to continue: Err was checked. -In contrast, the following code will always cause an abort, even if ``canFail`` +In contrast, the following code will always cause an abort, even if ``mayFail`` returns a success value: .. code-block:: c++ - canFail(); - // Program will always abort here, even if canFail() returns Success, since + mayFail(); + // Program will always abort here, even if mayFail() returns Success, since // the value is not checked. Failure values are considered checked once a handler for the error type has @@ -633,6 +633,12 @@ exiting with an error code, the :ref:`ExitOnError ` utility may be a better choice than handleErrors, as it simplifies control flow when calling fallible functions. +In situations where it is known that a particular call to a fallible function +will always succeed (for example, a call to a function that can only fail on a +subset of inputs with an input that is known to be safe) the +:ref:`cantFail ` functions can be used to remove the error type, +simplifying control flow. + StringError """"""""""" @@ -765,6 +771,43 @@ mapping can also be supplied from ``Error`` values to exit codes using the Use ``ExitOnError`` in your tool code where possible as it can greatly improve readability. +.. _err_cantfail: + +Using cantFail to simplify safe callsites +""""""""""""""""""""""""""""""""""""""""" + +Some functions may only fail for a subset of their inputs. For such functions +call-sites using known-safe inputs can assume that the result will be a success +value. + +The cantFail functions encapsulate this by wrapping an assertion that their +argument is a success value and, in the case of Expected, unwrapping the +T value from the Expected argument: + +.. code-block:: c++ + + Error mayFail(int X); + Expected mayFail2(int X); + + void foo() { + cantFail(mayFail(KnownSafeValue)); + int Y = cantFail(mayFail2(KnownSafeValue)); + ... + } + +Like the ExitOnError utility, cantFail simplifies control flow. Their treatment +of error cases is very different however: Where ExitOnError is guaranteed to +terminate the program on an error input, cantFile simply asserts that the result +is success. In debug builds this will result in an assertion failure if an error +is encountered. In release builds the behavior of cantFail for failure values is +undefined. As such, care must be taken in the use of cantFail: clients must be +certain that a cantFail wrapped call really can not fail under any +circumstances. + +Use of the cantFail functions should be rare in library code, but they are +likely to be of more use in tool and unit-test code where inputs and/or +mocked-up classes or functions may be known to be safe. + Fallible constructors """"""""""""""""""""" @@ -931,7 +974,7 @@ The ``function_ref`` class template ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ The ``function_ref`` -(`doxygen `__) class +(`doxygen `__) class template represents a reference to a callable object, templated over the type of the callable. This is a good choice for passing a callback to a function, if you don't need to hold onto the callback after the function returns. In this @@ -981,7 +1024,7 @@ you don't want them to always be noisy. A standard compromise is to comment them out, allowing you to enable them if you need them in the future. The ``llvm/Support/Debug.h`` (`doxygen -`__) file provides a macro named +`__) file provides a macro named ``DEBUG()`` that is a much nicer solution to this problem. Basically, you can put arbitrary code into the argument of the ``DEBUG`` macro, and it is only executed if '``opt``' (or any other tool) is run with the '``-debug``' command @@ -1078,7 +1121,7 @@ The ``Statistic`` class & ``-stats`` option ------------------------------------------- The ``llvm/ADT/Statistic.h`` (`doxygen -`__) file provides a class +`__) file provides a class named ``Statistic`` that is used as a unified way to keep track of what the LLVM compiler is doing and how effective various optimizations are. It is useful to see what optimizations are contributing to making a particular program run @@ -1094,23 +1137,23 @@ uniform manner with the rest of the passes being executed. There are many examples of ``Statistic`` uses, but the basics of using it are as follows: -#. Define your statistic like this: +Define your statistic like this: - .. code-block:: c++ +.. code-block:: c++ - #define DEBUG_TYPE "mypassname" // This goes before any #includes. - STATISTIC(NumXForms, "The # of times I did stuff"); + #define DEBUG_TYPE "mypassname" // This goes before any #includes. + STATISTIC(NumXForms, "The # of times I did stuff"); - The ``STATISTIC`` macro defines a static variable, whose name is specified by - the first argument. The pass name is taken from the ``DEBUG_TYPE`` macro, and - the description is taken from the second argument. The variable defined - ("NumXForms" in this case) acts like an unsigned integer. +The ``STATISTIC`` macro defines a static variable, whose name is specified by +the first argument. The pass name is taken from the ``DEBUG_TYPE`` macro, and +the description is taken from the second argument. The variable defined +("NumXForms" in this case) acts like an unsigned integer. -#. Whenever you make a transformation, bump the counter: +Whenever you make a transformation, bump the counter: - .. code-block:: c++ +.. code-block:: c++ - ++NumXForms; // I did stuff! + ++NumXForms; // I did stuff! That's all you have to do. To get '``opt``' to print out the statistics gathered, use the '``-stats``' option: @@ -1158,6 +1201,71 @@ Obviously, with so many optimizations, having a unified framework for this stuff is very nice. Making your pass fit well into the framework makes it more maintainable and useful. +.. _DebugCounters: + +Adding debug counters to aid in debugging your code +--------------------------------------------------- + +Sometimes, when writing new passes, or trying to track down bugs, it +is useful to be able to control whether certain things in your pass +happen or not. For example, there are times the minimization tooling +can only easily give you large testcases. You would like to narrow +your bug down to a specific transformation happening or not happening, +automatically, using bisection. This is where debug counters help. +They provide a framework for making parts of your code only execute a +certain number of times. + +The ``llvm/Support/DebugCounter.h`` (`doxygen +`__) file +provides a class named ``DebugCounter`` that can be used to create +command line counter options that control execution of parts of your code. + +Define your DebugCounter like this: + +.. code-block:: c++ + + DEBUG_COUNTER(DeleteAnInstruction, "passname-delete-instruction", + "Controls which instructions get delete"). + +The ``DEBUG_COUNTER`` macro defines a static variable, whose name +is specified by the first argument. The name of the counter +(which is used on the command line) is specified by the second +argument, and the description used in the help is specified by the +third argument. + +Whatever code you want that control, use ``DebugCounter::shouldExecute`` to control it. + +.. code-block:: c++ + + if (DebugCounter::shouldExecute(DeleteAnInstruction)) + I->eraseFromParent(); + +That's all you have to do. Now, using opt, you can control when this code triggers using +the '``--debug-counter``' option. There are two counters provided, ``skip`` and ``count``. +``skip`` is the number of times to skip execution of the codepath. ``count`` is the number +of times, once we are done skipping, to execute the codepath. + +.. code-block:: none + + $ opt --debug-counter=passname-delete-instruction-skip=1,passname-delete-instruction-count=2 -passname + +This will skip the above code the first time we hit it, then execute it twice, then skip the rest of the executions. + +So if executed on the following code: + +.. code-block:: llvm + + %1 = add i32 %a, %b + %2 = add i32 %a, %b + %3 = add i32 %a, %b + %4 = add i32 %a, %b + +It would delete number ``%2`` and ``%3``. + +A utility is provided in `utils/bisect-skip-count` to binary search +skip and count arguments. It can be used to automatically minimize the +skip and count for a debug-counter variable. + .. _ViewGraph: Viewing graphs while debugging code @@ -2257,18 +2365,12 @@ of a ``BasicBlock`` and the number of ``Instruction``\ s it contains: .. code-block:: c++ - // func is a pointer to a Function instance - for (Function::iterator i = func->begin(), e = func->end(); i != e; ++i) + Function &Func = ... + for (BasicBlock &BB : Func) // Print out the name of the basic block if it has one, and then the // number of instructions that it contains - errs() << "Basic block (name=" << i->getName() << ") has " - << i->size() << " instructions.\n"; - -Note that i can be used as if it were a pointer for the purposes of invoking -member functions of the ``Instruction`` class. This is because the indirection -operator is overloaded for the iterator classes. In the above code, the -expression ``i->size()`` is exactly equivalent to ``(*i).size()`` just like -you'd expect. + errs() << "Basic block (name=" << BB.getName() << ") has " + << BB.size() << " instructions.\n"; .. _iterate_basicblock: @@ -2281,17 +2383,17 @@ a code snippet that prints out each instruction in a ``BasicBlock``: .. code-block:: c++ - // blk is a pointer to a BasicBlock instance - for (BasicBlock::iterator i = blk->begin(), e = blk->end(); i != e; ++i) + BasicBlock& BB = ... + for (Instruction &I : BB) // The next statement works since operator<<(ostream&,...) // is overloaded for Instruction& - errs() << *i << "\n"; + errs() << I << "\n"; However, this isn't really the best way to print out the contents of a ``BasicBlock``! Since the ostream operators are overloaded for virtually anything you'll care about, you could have just invoked the print routine on the -basic block itself: ``errs() << *blk << "\n";``. +basic block itself: ``errs() << BB << "\n";``. .. _iterate_insiter: @@ -2425,13 +2527,13 @@ method): OurFunctionPass(): callCounter(0) { } virtual runOnFunction(Function& F) { - for (Function::iterator b = F.begin(), be = F.end(); b != be; ++b) { - for (BasicBlock::iterator i = b->begin(), ie = b->end(); i != ie; ++i) { - if (CallInst* callInst = dyn_cast(&*i)) { + for (BasicBlock &B : F) { + for (Instruction &I: B) { + if (auto *CallInst = dyn_cast(&I)) { // We know we've encountered a call instruction, so we // need to determine if it's a call to the // function pointed to by m_func or not. - if (callInst->getCalledFunction() == targetFunc) + if (CallInst->getCalledFunction() == targetFunc) ++callCounter; } } @@ -2524,12 +2626,11 @@ iterate over all predecessors of BB: #include "llvm/IR/CFG.h" BasicBlock *BB = ...; - for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { - BasicBlock *Pred = *PI; + for (BasicBlock *Pred : predecessors(BB)) { // ... } -Similarly, to iterate over successors use ``succ_iterator/succ_begin/succ_end``. +Similarly, to iterate over successors use ``successors``. .. _simplechanges: @@ -2554,7 +2655,7 @@ For example, an ``AllocaInst`` only *requires* a (const-ptr-to) ``Type``. Thus: .. code-block:: c++ - AllocaInst* ai = new AllocaInst(Type::Int32Ty); + auto *ai = new AllocaInst(Type::Int32Ty); will create an ``AllocaInst`` instance that represents the allocation of one integer in the current stack frame, at run time. Each ``Instruction`` subclass @@ -2579,7 +2680,7 @@ intending to use it within the same ``Function``. I might do: .. code-block:: c++ - AllocaInst* pa = new AllocaInst(Type::Int32Ty, 0, "indexLoc"); + auto *pa = new AllocaInst(Type::Int32Ty, 0, "indexLoc"); where ``indexLoc`` is now the logical name of the instruction's execution value, which is a pointer to an integer on the run time stack. @@ -2599,7 +2700,7 @@ sequence of instructions that form a ``BasicBlock``: BasicBlock *pb = ...; Instruction *pi = ...; - Instruction *newInst = new Instruction(...); + auto *newInst = new Instruction(...); pb->getInstList().insert(pi, newInst); // Inserts newInst before pi in pb @@ -2611,7 +2712,7 @@ sequence of instructions that form a ``BasicBlock``: .. code-block:: c++ BasicBlock *pb = ...; - Instruction *newInst = new Instruction(...); + auto *newInst = new Instruction(...); pb->getInstList().push_back(newInst); // Appends newInst to pb @@ -2620,7 +2721,7 @@ sequence of instructions that form a ``BasicBlock``: .. code-block:: c++ BasicBlock *pb = ...; - Instruction *newInst = new Instruction(..., pb); + auto *newInst = new Instruction(..., pb); which is much cleaner, especially if you are creating long instruction streams. @@ -2635,7 +2736,7 @@ sequence of instructions that form a ``BasicBlock``: .. code-block:: c++ Instruction *pi = ...; - Instruction *newInst = new Instruction(...); + auto *newInst = new Instruction(...); pi->getParent()->getInstList().insert(pi, newInst); @@ -2651,7 +2752,7 @@ sequence of instructions that form a ``BasicBlock``: .. code-block:: c++ Instruction* pi = ...; - Instruction* newInst = new Instruction(..., pi); + auto *newInst = new Instruction(..., pi); which is much cleaner, especially if you're creating a lot of instructions and adding them to ``BasicBlock``\ s. @@ -2718,7 +2819,7 @@ Replacing individual instructions """"""""""""""""""""""""""""""""" Including "`llvm/Transforms/Utils/BasicBlockUtils.h -`_" permits use of two +`_" permits use of two very useful replace functions: ``ReplaceInstWithValue`` and ``ReplaceInstWithInst``. @@ -2814,7 +2915,7 @@ is easier to read and write than the equivalent FunctionType *ft = FunctionType::get(Type::Int8Ty, params, false); See the `class comment -`_ for more details. +`_ for more details. .. _threading: @@ -2903,7 +3004,7 @@ Another way is to only call ``getPointerToFunction()`` from the When the JIT is configured to compile lazily (using ``ExecutionEngine::DisableLazyCompilation(false)``), there is currently a `race -condition `_ in updating call sites +condition `_ in updating call sites after a function is lazily-jitted. It's still possible to use the lazy JIT in a threaded program if you ensure that only one thread at a time can call any particular lazy stub and that the JIT lock guards any IR access, but we suggest @@ -3235,7 +3336,7 @@ The Core LLVM Class Hierarchy Reference ``#include "llvm/IR/Type.h"`` -header source: `Type.h `_ +header source: `Type.h `_ doxygen info: `Type Clases `_ @@ -3339,7 +3440,7 @@ The ``Module`` class ``#include "llvm/IR/Module.h"`` -header source: `Module.h `_ +header source: `Module.h `_ doxygen info: `Module Class `_ @@ -3426,7 +3527,7 @@ The ``Value`` class ``#include "llvm/IR/Value.h"`` -header source: `Value.h `_ +header source: `Value.h `_ doxygen info: `Value Class `_ @@ -3517,7 +3618,7 @@ The ``User`` class ``#include "llvm/IR/User.h"`` -header source: `User.h `_ +header source: `User.h `_ doxygen info: `User Class `_ @@ -3564,7 +3665,7 @@ The ``Instruction`` class ``#include "llvm/IR/Instruction.h"`` header source: `Instruction.h -`_ +`_ doxygen info: `Instruction Class `_ @@ -3712,7 +3813,7 @@ The ``GlobalValue`` class ``#include "llvm/IR/GlobalValue.h"`` header source: `GlobalValue.h -`_ +`_ doxygen info: `GlobalValue Class `_ @@ -3770,7 +3871,7 @@ The ``Function`` class ``#include "llvm/IR/Function.h"`` -header source: `Function.h `_ +header source: `Function.h `_ doxygen info: `Function Class `_ @@ -3879,7 +3980,7 @@ The ``GlobalVariable`` class ``#include "llvm/IR/GlobalVariable.h"`` header source: `GlobalVariable.h -`_ +`_ doxygen info: `GlobalVariable Class `_ @@ -3937,7 +4038,7 @@ The ``BasicBlock`` class ``#include "llvm/IR/BasicBlock.h"`` header source: `BasicBlock.h -`_ +`_ doxygen info: `BasicBlock Class `_ diff --git a/docs/ReleaseNotes.rst b/docs/ReleaseNotes.rst index cebc59001b27935930608a49139123ba1ad85ed6..dbffb53d5a51911c5a8b08e8d0733d5579f6fdd4 100644 --- a/docs/ReleaseNotes.rst +++ b/docs/ReleaseNotes.rst @@ -6,9 +6,9 @@ LLVM 5.0.0 Release Notes :local: .. warning:: - These are in-progress notes for the upcoming LLVM 5 release. You may - prefer the `LLVM 4 Release Notes `_. + These are in-progress notes for the upcoming LLVM 5 release. + Release notes for previous releases can be found on + `the Download Page `_. Introduction diff --git a/docs/ScudoHardenedAllocator.rst b/docs/ScudoHardenedAllocator.rst index a22051cca063009312ef876d219ed9fb8a6df4ec..e00c8324e55aba8a13c02cfe5d3f193d98cafb80 100644 --- a/docs/ScudoHardenedAllocator.rst +++ b/docs/ScudoHardenedAllocator.rst @@ -13,6 +13,13 @@ The Scudo Hardened Allocator is a user-mode allocator based on LLVM Sanitizer's CombinedAllocator, which aims at providing additional mitigations against heap based vulnerabilities, while maintaining good performance. +Currently, the allocator supports (was tested on) the following architectures: + +- i386 (& i686) (32-bit); +- x86_64 (64-bit); +- armhf (32-bit); +- AArch64 (64-bit). + The name "Scudo" has been retained from the initial implementation (Escudo meaning Shield in Spanish and Portuguese). @@ -31,29 +38,25 @@ header is accessed, and the process terminated. The following information is stored in the header: - the 16-bit checksum; -- the user requested size for that chunk, which is necessary for reallocation - purposes; +- the unused bytes amount for that chunk, which is necessary for computing the + size of the chunk; - the state of the chunk (available, allocated or quarantined); - the allocation type (malloc, new, new[] or memalign), to detect potential mismatches in the allocation APIs used; -- whether or not the chunk is offseted (ie: if the chunk beginning is different - than the backend allocation beginning, which is most often the case with some - aligned allocations); -- the associated offset; -- a 16-bit salt. +- the offset of the chunk, which is the distance in bytes from the beginning of + the returned chunk to the beginning of the backend allocation; +- a 8-bit salt. -On x64, which is currently the only architecture supported, the header fits -within 16-bytes, which works nicely with the minimum alignment requirements. +This header fits within 8 bytes, on all platforms supported. -The checksum is computed as a CRC32 (requiring the SSE 4.2 instruction set) -of the global secret, the chunk pointer itself, and the 16 bytes of header with +The checksum is computed as a CRC32 (made faster with hardware support) +of the global secret, the chunk pointer itself, and the 8 bytes of header with the checksum field zeroed out. -The header is atomically loaded and stored to prevent races (this requires -platform support such as the cmpxchg16b instruction). This is important as two -consecutive chunks could belong to different threads. We also want to avoid -any type of double fetches of information located in the header, and use local -copies of the header for this purpose. +The header is atomically loaded and stored to prevent races. This is important +as two consecutive chunks could belong to different threads. We also want to +avoid any type of double fetches of information located in the header, and use +local copies of the header for this purpose. Delayed Freelist ----------------- @@ -94,9 +97,9 @@ You may also build Scudo like this: .. code:: cd $LLVM/projects/compiler-rt/lib - clang++ -fPIC -std=c++11 -msse4.2 -mcx16 -O2 -I. scudo/*.cpp \ + clang++ -fPIC -std=c++11 -msse4.2 -O2 -I. scudo/*.cpp \ $(\ls sanitizer_common/*.{cc,S} | grep -v "sanitizer_termination\|sanitizer_common_nolibc") \ - -shared -o scudo-allocator.so -lpthread + -shared -o scudo-allocator.so -pthread and then use it with existing binaries as follows: @@ -136,29 +139,29 @@ Or using the function: The following options are available: -+-----------------------------+---------+------------------------------------------------+ -| Option | Default | Description | -+-----------------------------+---------+------------------------------------------------+ -| QuarantineSizeMb | 64 | The size (in Mb) of quarantine used to delay | -| | | the actual deallocation of chunks. Lower value | -| | | may reduce memory usage but decrease the | -| | | effectiveness of the mitigation; a negative | -| | | value will fallback to a default of 64Mb. | -+-----------------------------+---------+------------------------------------------------+ -| ThreadLocalQuarantineSizeKb | 1024 | The size (in Kb) of per-thread cache use to | -| | | offload the global quarantine. Lower value may | -| | | reduce memory usage but might increase | -| | | contention on the global quarantine. | -+-----------------------------+---------+------------------------------------------------+ -| DeallocationTypeMismatch | true | Whether or not we report errors on | -| | | malloc/delete, new/free, new/delete[], etc. | -+-----------------------------+---------+------------------------------------------------+ -| DeleteSizeMismatch | true | Whether or not we report errors on mismatch | -| | | between sizes of new and delete. | -+-----------------------------+---------+------------------------------------------------+ -| ZeroContents | false | Whether or not we zero chunk contents on | -| | | allocation and deallocation. | -+-----------------------------+---------+------------------------------------------------+ ++-----------------------------+----------------+----------------+------------------------------------------------+ +| Option | 64-bit default | 32-bit default | Description | ++-----------------------------+----------------+----------------+------------------------------------------------+ +| QuarantineSizeMb | 64 | 16 | The size (in Mb) of quarantine used to delay | +| | | | the actual deallocation of chunks. Lower value | +| | | | may reduce memory usage but decrease the | +| | | | effectiveness of the mitigation; a negative | +| | | | value will fallback to a default of 64Mb. | ++-----------------------------+----------------+----------------+------------------------------------------------+ +| ThreadLocalQuarantineSizeKb | 1024 | 256 | The size (in Kb) of per-thread cache use to | +| | | | offload the global quarantine. Lower value may | +| | | | reduce memory usage but might increase | +| | | | contention on the global quarantine. | ++-----------------------------+----------------+----------------+------------------------------------------------+ +| DeallocationTypeMismatch | true | true | Whether or not we report errors on | +| | | | malloc/delete, new/free, new/delete[], etc. | ++-----------------------------+----------------+----------------+------------------------------------------------+ +| DeleteSizeMismatch | true | true | Whether or not we report errors on mismatch | +| | | | between sizes of new and delete. | ++-----------------------------+----------------+----------------+------------------------------------------------+ +| ZeroContents | false | false | Whether or not we zero chunk contents on | +| | | | allocation and deallocation. | ++-----------------------------+----------------+----------------+------------------------------------------------+ Allocator related common Sanitizer options can also be passed through Scudo options, such as ``allocator_may_return_null``. A detailed list including those diff --git a/docs/Statepoints.rst b/docs/Statepoints.rst index 29b1be37a893cce8934c52d1546d94f36e9100fd..7f2b20544812f6e3da79e11c2984097b9c7d93e5 100644 --- a/docs/Statepoints.rst +++ b/docs/Statepoints.rst @@ -831,7 +831,7 @@ Bugs and Enhancements Currently known bugs and enhancements under consideration can be tracked by performing a `bugzilla search -`_ +`_ for [Statepoint] in the summary field. When filing new bugs, please use this tag so that interested parties see the newly filed bug. As with most LLVM features, design discussions take place on `llvm-dev diff --git a/docs/TableGen/BackEnds.rst b/docs/TableGen/BackEnds.rst index fdab266fa31ce31c39e741f394a3cf59be5353af..993134386f76965299b5f64e269e19e687e881bc 100644 --- a/docs/TableGen/BackEnds.rst +++ b/docs/TableGen/BackEnds.rst @@ -228,6 +228,12 @@ CTags format. A helper script, utils/TableGen/tdtags, provides an easier-to-use interface; run 'tdtags -H' for documentation. +X86EVEX2VEX +----------- + +**Purpose**: This X86 specific tablegen backend emits tables that map EVEX +encoded instructions to their VEX encoded identical instruction. + Clang BackEnds ============== diff --git a/docs/TestingGuide.rst b/docs/TestingGuide.rst index 99616770d8e9ee37fd83362bee2a62d9b303dd42..a27da0de4d0e29de083c2bb470aaa7aedc841c0a 100644 --- a/docs/TestingGuide.rst +++ b/docs/TestingGuide.rst @@ -468,6 +468,25 @@ RUN lines: Expands to the path separator, i.e. ``:`` (or ``;`` on Windows). +``%/s, %/S, %/t, %/T:`` + + Act like the corresponding substitution above but replace any ``\`` + character with a ``/``. This is useful to normalize path separators. + + Example: ``%s: C:\Desktop Files/foo_test.s.tmp`` + + Example: ``%/s: C:/Desktop Files/foo_test.s.tmp`` + +``%:s, %:S, %:t, %:T:`` + + Act like the corresponding substitution above but remove colons at + the beginning of Windows paths. This is useful to allow concatenation + of absolute paths on Windows to produce a legal path. + + Example: ``%s: C:\Desktop Files\foo_test.s.tmp`` + + Example: ``%:s: C\Desktop Files\foo_test.s.tmp`` + **LLVM-specific substitutions:** diff --git a/docs/XRay.rst b/docs/XRay.rst index 222cc8f2e049c0e898af2c97a3ca6181e8c7f7e6..d650319e99220f66674a970fde1b8516e1e9eb2b 100644 --- a/docs/XRay.rst +++ b/docs/XRay.rst @@ -28,8 +28,9 @@ XRay consists of three main parts: - A runtime library for enabling/disabling tracing at runtime. - A suite of tools for analysing the traces. - **NOTE:** As of the time of this writing, XRay is only available for x86_64 - and arm7 32-bit (no-thumb) Linux. + **NOTE:** As of February 27, 2017 , XRay is only available for the following + architectures running Linux: x86_64, arm7 (no thumb), aarch64, powerpc64le, + mips, mipsel, mips64, mips64el. The compiler-inserted instrumentation points come in the form of nop-sleds in the final generated binary, and an ELF section named ``xray_instr_map`` which @@ -84,7 +85,10 @@ GCC-style attributes or C++11-style attributes. When linking a binary, you can either manually link in the `XRay Runtime Library`_ or use ``clang`` to link it in automatically with the -``-fxray-instrument`` flag. +``-fxray-instrument`` flag. Alternatively, you can statically link-in the XRay +runtime library from compiler-rt -- those archive files will take the name of +`libclang_rt.xray-{arch}` where `{arch}` is the mnemonic supported by clang +(x86_64, arm7, etc.). LLVM Function Attribute ----------------------- @@ -135,7 +139,7 @@ variable, where we list down the options and their defaults below. +-------------------+-----------------+---------------+------------------------+ | Option | Type | Default | Description | +===================+=================+===============+========================+ -| patch_premain | ``bool`` | ``true`` | Whether to patch | +| patch_premain | ``bool`` | ``false`` | Whether to patch | | | | | instrumentation points | | | | | before main. | +-------------------+-----------------+---------------+------------------------+ @@ -146,6 +150,11 @@ variable, where we list down the options and their defaults below. | xray_logfile_base | ``const char*`` | ``xray-log.`` | Filename base for the | | | | | XRay logfile. | +-------------------+-----------------+---------------+------------------------+ +| xray_fdr_log | ``bool`` | ``false`` | Wheter to install the | +| | | | Flight Data Recorder | +| | | | (FDR) mode. | ++-------------------+-----------------+---------------+------------------------+ + If you choose to not use the default logging implementation that comes with the XRay runtime and/or control when/how the XRay instrumentation runs, you may use @@ -175,6 +184,64 @@ thread-safety of operations to be performed by the XRay runtime library: XRay cannot guarantee that all threads that have ever gotten a copy of the pointer will not invoke the function. +Flight Data Recorder Mode +------------------------- + +XRay supports a logging mode which allows the application to only capture a +fixed amount of memory's worth of events. Flight Data Recorder (FDR) mode works +very much like a plane's "black box" which keeps recording data to memory in a +fixed-size circular queue of buffers, and have the data available +programmatically until the buffers are finalized and flushed. To use FDR mode +on your application, you may set the ``xray_fdr_log`` option to ``true`` in the +``XRAY_OPTIONS`` environment variable (while also optionally setting the +``xray_naive_log`` to ``false``). + +When FDR mode is on, it will keep writing and recycling memory buffers until +the logging implementation is finalized -- at which point it can be flushed and +re-initialised later. To do this programmatically, we follow the workflow +provided below: + +.. code-block:: c++ + + // Patch the sleds, if we haven't yet. + auto patch_status = __xray_patch(); + + // Maybe handle the patch_status errors. + + // When we want to flush the log, we need to finalize it first, to give + // threads a chance to return buffers to the queue. + auto finalize_status = __xray_log_finalize(); + if (finalize_status != XRAY_LOG_FINALIZED) { + // maybe retry, or bail out. + } + + // At this point, we are sure that the log is finalized, so we may try + // flushing the log. + auto flush_status = __xray_log_flushLog(); + if (flush_status != XRAY_LOG_FLUSHED) { + // maybe retry, or bail out. + } + +The default settings for the FDR mode implementation will create logs named +similarly to the naive log implementation, but will have a different log +format. All the trace analysis tools (and the trace reading library) will +support all versions of the FDR mode format as we add more functionality and +record types in the future. + + **NOTE:** We do not however promise perpetual support for when we update the + log versions we support going forward. Deprecation of the formats will be + announced and discussed on the developers mailing list. + +XRay allows for replacing the default FDR mode logging implementation using the +following API: + +- ``__xray_set_log_impl(...)``: This function takes a struct of type + ``XRayLogImpl``, which is defined in ``xray/xray_log_interface.h``, part of + the XRay compiler-rt installation. +- ``__xray_log_init(...)``: This function allows for initializing and + re-initializing an installed logging implementation. See + ``xray/xray_log_interface.h`` for details, part of the XRay compiler-rt + installation. Trace Analysis Tools -------------------- @@ -185,7 +252,26 @@ supports the following subcommands: - ``extract``: Extract the instrumentation map from a binary, and return it as YAML. - +- ``account``: Performs basic function call accounting statistics with various + options for sorting, and output formats (supports CSV, YAML, and + console-friendly TEXT). +- ``convert``: Converts an XRay log file from one format to another. Currently + only converts to YAML. +- ``graph``: Generates a DOT graph of the function call relationships between + functions found in an XRay trace. + +These subcommands use various library components found as part of the XRay +libraries, distributed with the LLVM distribution. These are: + +- ``llvm/XRay/Trace.h`` : A trace reading library for conveniently loading + an XRay trace of supported forms, into a convenient in-memory representation. + All the analysis tools that deal with traces use this implementation. +- ``llvm/XRay/Graph.h`` : A semi-generic graph type used by the graph + subcommand to conveniently represent a function call graph with statistics + associated with edges and vertices. +- ``llvm/XRay/InstrumentationMap.h``: A convenient tool for analyzing the + instrumentation map in XRay-instrumented object files and binaries. The + ``extract`` subcommand uses this particular library. Future Work =========== @@ -193,38 +279,19 @@ Future Work There are a number of ongoing efforts for expanding the toolset building around the XRay instrumentation system. -Flight Data Recorder Mode -------------------------- - -The `XRay whitepaper`_ mentions a mode for when events are kept in memory, and -have the traces be dumped on demand through a triggering API. This work is -currently ongoing. - Trace Analysis -------------- -There are a few more subcommands making its way to the ``llvm-xray`` tool, that -are currently under review: - -- ``convert``: Turns an XRay trace from one format to another. Currently - supporting conversion from the binary XRay log to YAML. -- ``account``: Do function call accounting based on data in the XRay log. - We have more subcommands and modes that we're thinking of developing, in the following forms: - ``stack``: Reconstruct the function call stacks in a timeline. -- ``convert``: Converting from one version of the XRay log to another (higher) - version, and converting to other trace formats (i.e. Chrome Trace Viewer, - pprof, etc.). -- ``graph``: Generate a function call graph with relative timings and distributions. More Platforms -------------- -Since XRay is only currently available in x86_64 and arm7 32-bit (no-thumb) -running Linux, we're looking to supporting more platforms (architectures and -operating systems). +We're looking forward to contributions to port XRay to more architectures and +operating systems. .. References... diff --git a/docs/XRayExample.rst b/docs/XRayExample.rst new file mode 100644 index 0000000000000000000000000000000000000000..5dfb0bfaf29893f878442120de72487bfb57dad8 --- /dev/null +++ b/docs/XRayExample.rst @@ -0,0 +1,273 @@ +=================== +Debugging with XRay +=================== + +This document shows an example of how you would go about analyzing applications +built with XRay instrumentation. Here we will attempt to debug ``llc`` +compiling some sample LLVM IR generated by Clang. + +.. contents:: + :local: + +Building with XRay +------------------ + +To debug an application with XRay instrumentation, we need to build it with a +Clang that supports the ``-fxray-instrument`` option. See `XRay `_ +for more technical details of how XRay works for background information. + +In our example, we need to add ``-fxray-instrument`` to the list of flags +passed to Clang when building a binary. Note that we need to link with Clang as +well to get the XRay runtime linked in appropriately. For building ``llc`` with +XRay, we do something similar below for our LLVM build: + +:: + + $ mkdir -p llvm-build && cd llvm-build + # Assume that the LLVM sources are at ../llvm + $ cmake -GNinja ../llvm -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_C_FLAGS_RELEASE="-fxray-instrument" -DCMAKE_CXX_FLAGS="-fxray-instrument" \ + # Once this finishes, we should build llc + $ ninja llc + + +To verify that we have an XRay instrumented binary, we can use ``objdump`` to +look for the ``xray_instr_map`` section. + +:: + + $ objdump -h -j xray_instr_map ./bin/llc + ./bin/llc: file format elf64-x86-64 + + Sections: + Idx Name Size VMA LMA File off Algn + 14 xray_instr_map 00002fc0 00000000041516c6 00000000041516c6 03d516c6 2**0 + CONTENTS, ALLOC, LOAD, READONLY, DATA + +Getting Traces +-------------- + +By default, XRay does not write out the trace files or patch the application +before main starts. If we just run ``llc`` it should just work like a normally +built binary. However, if we want to get a full trace of the application's +operations (of the functions we do end up instrumenting with XRay) then we need +to enable XRay at application start. To do this, XRay checks the +``XRAY_OPTIONS`` environment variable. + +:: + + # The following doesn't create an XRay trace by default. + $ ./bin/llc input.ll + + # We need to set the XRAY_OPTIONS to enable some features. + $ XRAY_OPTIONS="patch_premain=true" ./bin/llc input.ll + ==69819==XRay: Log file in 'xray-log.llc.m35qPB' + +At this point we now have an XRay trace we can start analysing. + +The ``llvm-xray`` Tool +---------------------- + +Having a trace then allows us to do basic accounting of the functions that were +instrumented, and how much time we're spending in parts of the code. To make +sense of this data, we use the ``llvm-xray`` tool which has a few subcommands +to help us understand our trace. + +One of the simplest things we can do is to get an accounting of the functions +that have been instrumented. We can see an example accounting with ``llvm-xray +account``: + +:: + + $ llvm-xray account xray-log.llc.m35qPB -top=10 -sort=sum -sortorder=dsc -instr_map ./bin/llc + Functions with latencies: 29 + funcid count [ min, med, 90p, 99p, max] sum function + 187 360 [ 0.000000, 0.000001, 0.000014, 0.000032, 0.000075] 0.001596 LLLexer.cpp:446:0: llvm::LLLexer::LexIdentifier() + 85 130 [ 0.000000, 0.000000, 0.000018, 0.000023, 0.000156] 0.000799 X86ISelDAGToDAG.cpp:1984:0: (anonymous namespace)::X86DAGToDAGISel::Select(llvm::SDNode*) + 138 130 [ 0.000000, 0.000000, 0.000017, 0.000155, 0.000155] 0.000774 SelectionDAGISel.cpp:2963:0: llvm::SelectionDAGISel::SelectCodeCommon(llvm::SDNode*, unsigned char const*, unsigned int) + 188 103 [ 0.000000, 0.000000, 0.000003, 0.000123, 0.000214] 0.000737 LLParser.cpp:2692:0: llvm::LLParser::ParseValID(llvm::ValID&, llvm::LLParser::PerFunctionState*) + 88 1 [ 0.000562, 0.000562, 0.000562, 0.000562, 0.000562] 0.000562 X86ISelLowering.cpp:83:0: llvm::X86TargetLowering::X86TargetLowering(llvm::X86TargetMachine const&, llvm::X86Subtarget const&) + 125 102 [ 0.000001, 0.000003, 0.000010, 0.000017, 0.000049] 0.000471 Verifier.cpp:3714:0: (anonymous namespace)::Verifier::visitInstruction(llvm::Instruction&) + 90 8 [ 0.000023, 0.000035, 0.000106, 0.000106, 0.000106] 0.000342 X86ISelLowering.cpp:3363:0: llvm::X86TargetLowering::LowerCall(llvm::TargetLowering::CallLoweringInfo&, llvm::SmallVectorImpl&) const + 124 32 [ 0.000003, 0.000007, 0.000016, 0.000041, 0.000041] 0.000310 Verifier.cpp:1967:0: (anonymous namespace)::Verifier::visitFunction(llvm::Function const&) + 123 1 [ 0.000302, 0.000302, 0.000302, 0.000302, 0.000302] 0.000302 LLVMContextImpl.cpp:54:0: llvm::LLVMContextImpl::~LLVMContextImpl() + 139 46 [ 0.000000, 0.000002, 0.000006, 0.000008, 0.000019] 0.000138 TargetLowering.cpp:506:0: llvm::TargetLowering::SimplifyDemandedBits(llvm::SDValue, llvm::APInt const&, llvm::APInt&, llvm::APInt&, llvm::TargetLowering::TargetLoweringOpt&, unsigned int, bool) const + +This shows us that for our input file, ``llc`` spent the most cumulative time +in the lexer (a total of 1 millisecond). If we wanted for example to work with +this data in a spreadsheet, we can output the results as CSV using the +``-format=csv`` option to the command for further analysis. + +If we want to get a textual representation of the raw trace we can use the +``llvm-xray convert`` tool to get YAML output. The first few lines of that +ouput for an example trace would look like the following: + +:: + + $ llvm-xray convert -f yaml -symbolize -instr_map=./bin/llc xray-log.llc.m35qPB + --- + header: + version: 1 + type: 0 + constant-tsc: true + nonstop-tsc: true + cycle-frequency: 2601000000 + records: + - { type: 0, func-id: 110, function: __cxx_global_var_init.8, cpu: 37, thread: 69819, kind: function-enter, tsc: 5434426023268520 } + - { type: 0, func-id: 110, function: __cxx_global_var_init.8, cpu: 37, thread: 69819, kind: function-exit, tsc: 5434426023523052 } + - { type: 0, func-id: 164, function: __cxx_global_var_init, cpu: 37, thread: 69819, kind: function-enter, tsc: 5434426029925386 } + - { type: 0, func-id: 164, function: __cxx_global_var_init, cpu: 37, thread: 69819, kind: function-exit, tsc: 5434426030031128 } + - { type: 0, func-id: 142, function: '(anonymous namespace)::CommandLineParser::ParseCommandLineOptions(int, char const* const*, llvm::StringRef, llvm::raw_ostream*)', cpu: 37, thread: 69819, kind: function-enter, tsc: 5434426046951388 } + - { type: 0, func-id: 142, function: '(anonymous namespace)::CommandLineParser::ParseCommandLineOptions(int, char const* const*, llvm::StringRef, llvm::raw_ostream*)', cpu: 37, thread: 69819, kind: function-exit, tsc: 5434426047282020 } + - { type: 0, func-id: 187, function: 'llvm::LLLexer::LexIdentifier()', cpu: 37, thread: 69819, kind: function-enter, tsc: 5434426047857332 } + - { type: 0, func-id: 187, function: 'llvm::LLLexer::LexIdentifier()', cpu: 37, thread: 69819, kind: function-exit, tsc: 5434426047984152 } + - { type: 0, func-id: 187, function: 'llvm::LLLexer::LexIdentifier()', cpu: 37, thread: 69819, kind: function-enter, tsc: 5434426048036584 } + - { type: 0, func-id: 187, function: 'llvm::LLLexer::LexIdentifier()', cpu: 37, thread: 69819, kind: function-exit, tsc: 5434426048042292 } + - { type: 0, func-id: 187, function: 'llvm::LLLexer::LexIdentifier()', cpu: 37, thread: 69819, kind: function-enter, tsc: 5434426048055056 } + - { type: 0, func-id: 187, function: 'llvm::LLLexer::LexIdentifier()', cpu: 37, thread: 69819, kind: function-exit, tsc: 5434426048067316 } + +Controlling Fidelity +-------------------- + +So far in our examples, we haven't been getting full coverage of the functions +we have in the binary. To get that, we need to modify the compiler flags so +that we can instrument more (if not all) the functions we have in the binary. +We have two options for doing that, and we explore both of these below. + +Instruction Threshold +````````````````````` + +The first "blunt" way of doing this is by setting the minimum threshold for +function bodies to 1. We can do that with the +``-fxray-instruction-threshold=N`` flag when building our binary. We rebuild +``llc`` with this option and observe the results: + +:: + + $ rm CMakeCache.txt + $ cmake -GNinja ../llvm -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_C_FLAGS_RELEASE="-fxray-instrument -fxray-instruction-threshold=1" \ + -DCMAKE_CXX_FLAGS="-fxray-instrument -fxray-instruction-threshold=1" + $ ninja llc + $ XRAY_OPTIONS="patch_premain=true" ./bin/llc input.ll + ==69819==XRay: Log file in 'xray-log.llc.5rqxkU' + + $ llvm-xray account xray-log.llc.5rqxkU -top=10 -sort=sum -sortorder=dsc -instr_map ./bin/llc + Functions with latencies: 36652 + funcid count [ min, med, 90p, 99p, max] sum function + 75 1 [ 0.672368, 0.672368, 0.672368, 0.672368, 0.672368] 0.672368 llc.cpp:271:0: main + 78 1 [ 0.626455, 0.626455, 0.626455, 0.626455, 0.626455] 0.626455 llc.cpp:381:0: compileModule(char**, llvm::LLVMContext&) + 139617 1 [ 0.472618, 0.472618, 0.472618, 0.472618, 0.472618] 0.472618 LegacyPassManager.cpp:1723:0: llvm::legacy::PassManager::run(llvm::Module&) + 139610 1 [ 0.472618, 0.472618, 0.472618, 0.472618, 0.472618] 0.472618 LegacyPassManager.cpp:1681:0: llvm::legacy::PassManagerImpl::run(llvm::Module&) + 139612 1 [ 0.470948, 0.470948, 0.470948, 0.470948, 0.470948] 0.470948 LegacyPassManager.cpp:1564:0: (anonymous namespace)::MPPassManager::runOnModule(llvm::Module&) + 139607 2 [ 0.147345, 0.315994, 0.315994, 0.315994, 0.315994] 0.463340 LegacyPassManager.cpp:1530:0: llvm::FPPassManager::runOnModule(llvm::Module&) + 139605 21 [ 0.000002, 0.000002, 0.102593, 0.213336, 0.213336] 0.463331 LegacyPassManager.cpp:1491:0: llvm::FPPassManager::runOnFunction(llvm::Function&) + 139563 26096 [ 0.000002, 0.000002, 0.000037, 0.000063, 0.000215] 0.225708 LegacyPassManager.cpp:1083:0: llvm::PMDataManager::findAnalysisPass(void const*, bool) + 108055 188 [ 0.000002, 0.000120, 0.001375, 0.004523, 0.062624] 0.159279 MachineFunctionPass.cpp:38:0: llvm::MachineFunctionPass::runOnFunction(llvm::Function&) + 62635 22 [ 0.000041, 0.000046, 0.000050, 0.126744, 0.126744] 0.127715 X86TargetMachine.cpp:242:0: llvm::X86TargetMachine::getSubtargetImpl(llvm::Function const&) const + + +Instrumentation Attributes +`````````````````````````` + +The other way is to use configuration files for selecting which functions +should always be instrumented by the compiler. This gives us a way of ensuring +that certain functions are either always or never instrumented by not having to +add the attribute to the source. + +To use this feature, you can define one file for the functions to always +instrument, and another for functions to never instrument. The format of these +files are exactly the same as the SanitizerLists files that control similar +things for the sanitizer implementations. For example, we can have two +different files like below: + +:: + + # always-instrument.txt + # always instrument functions that match the following filters: + fun:main + + # never-instrument.txt + # never instrument functions that match the following filters: + fun:__cxx_* + +Given the above two files we can re-build by providing those two files as +arguments to clang as ``-fxray-always-instrument=always-instrument.txt`` or +``-fxray-never-instrument=never-instrument.txt``. + +Further Exploration +------------------- + +The ``llvm-xray`` tool has a few other subcommands that are in various stages +of being developed. One interesting subcommand that can highlight a few +interesting things is the ``graph`` subcommand. Given for example the following +toy program that we build with XRay instrumentation, we can see how the +generated graph may be a helpful indicator of where time is being spent for the +application. + +.. code-block:: c++ + + // sample.cc + #include + #include + + [[clang::xray_always_intrument]] void f() { + std::cerr << '.'; + } + + [[clang::xray_always_intrument]] void g() { + for (int i = 0; i < 1 << 10; ++i) { + std::cerr << '-'; + } + } + + int main(int argc, char* argv[]) { + std::thread t1([] { + for (int i = 0; i < 1 << 10; ++i) + f(); + }); + std::thread t2([] { + g(); + }); + t1.join(); + t2.join(); + std::cerr << '\n'; + } + +We then build the above with XRay instrumentation: + +:: + + $ clang++ -o sample -O3 sample.cc -std=c++11 -fxray-instrument -fxray-instruction-threshold=1 + $ XRAY_OPTIONS="patch_premain=true" ./sample + +We can then explore the graph rendering of the trace generated by this sample +application. We assume you have the graphviz toosl available in your system, +including both ``unflatten`` and ``dot``. If you prefer rendering or exploring +the graph using another tool, then that should be feasible as well. ``llvm-xray +graph`` will create DOT format graphs which should be usable in most graph +rendering applications. One example invocation of the ``llvm-xray graph`` +command should yield some interesting insights to the workings of C++ +applications: + +:: + + $ llvm-xray graph xray-log.sample.* -m sample -color-edges=sum -edge-label=sum \ + | unflatten -f -l10 | dot -Tsvg -o sample.svg + +Next Steps +---------- + +If you have some interesting analyses you'd like to implement as part of the +llvm-xray tool, please feel free to propose them on the llvm-dev@ mailing list. +The following are some ideas to inspire you in getting involved and potentially +making things better. + + - Implement a query/filtering library that allows for finding patterns in the + XRay traces. + - A conversion from the XRay trace onto something that can be visualised + better by other tools (like the Chrome trace viewer for example). + - Collecting function call stacks and how often they're encountered in the + XRay trace. + + diff --git a/docs/conf.py b/docs/conf.py index cd9142cefdf2da2eef22639beb9a0a98d309b5b9..e7c18da48ebef9ca14ba5d4526bdfe7cffbc0fd5 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -251,3 +251,7 @@ for name in os.listdir(command_guide_path): # FIXME: Define intersphinx configuration. intersphinx_mapping = {} + +# Pygment lexer are sometimes out of date (when parsing LLVM for example) or +# wrong. Suppress the warning so the build doesn't abort. +suppress_warnings = [ 'misc.highlighting_failure' ] diff --git a/docs/index.rst b/docs/index.rst index 341a9c16325b9ab390e79eb8770d9cb931347b05..fe47eb1bcb7f7b38343412429548d84ae98999a5 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -274,6 +274,7 @@ For API clients and LLVM developers. Coroutines GlobalISel XRay + XRayExample PDB/index :doc:`WritingAnLLVMPass` @@ -399,6 +400,9 @@ For API clients and LLVM developers. :doc:`XRay` High-level documentation of how to use XRay in LLVM. +:doc:`XRayExample` + An example of how to debug an application with XRay. + :doc:`The Microsoft PDB File Format ` A detailed description of the Microsoft PDB (Program Database) file format. diff --git a/docs/tutorial/BuildingAJIT1.rst b/docs/tutorial/BuildingAJIT1.rst index 80957ee620f0fb66ee33ebed6b9834ce23c504ce..625cbbba1a5cc8c59b85ce264b38d1370fe61b52 100644 --- a/docs/tutorial/BuildingAJIT1.rst +++ b/docs/tutorial/BuildingAJIT1.rst @@ -125,14 +125,12 @@ usual include guards and #includes [2]_, we get to the definition of our class: class KaleidoscopeJIT { private: - std::unique_ptr TM; const DataLayout DL; ObjectLinkingLayer<> ObjectLayer; IRCompileLayer CompileLayer; public: - typedef decltype(CompileLayer)::ModuleSetHandleT ModuleHandleT; Our class begins with four members: A TargetMachine, TM, which will be used @@ -152,16 +150,16 @@ compiling it, and passing the resulting in-memory object files down to the object linking layer below. That's it for member variables, after that we have a single typedef: -ModuleHandle. This is the handle type that will be returned from our JIT's +ModuleHandleT. This is the handle type that will be returned from our JIT's addModule method, and can be passed to the removeModule method to remove a module. The IRCompileLayer class already provides a convenient handle type -(IRCompileLayer::ModuleSetHandleT), so we just alias our ModuleHandle to this. +(IRCompileLayer::ModuleSetHandleT), so we just alias our ModuleHandleT to this. .. code-block:: c++ KaleidoscopeJIT() : TM(EngineBuilder().selectTarget()), DL(TM->createDataLayout()), - CompileLayer(ObjectLayer, SimpleCompiler(*TM)) { + CompileLayer(ObjectLayer, SimpleCompiler(*TM)) { llvm::sys::DynamicLibrary::LoadLibraryPermanently(nullptr); } @@ -200,7 +198,7 @@ available for execution. return JITSymbol(nullptr); }); - // Build a singlton module set to hold our module. + // Build a singleton module set to hold our module. std::vector> Ms; Ms.push_back(std::move(M)); @@ -259,16 +257,16 @@ were linked into a single, ever-growing logical dylib. To implement this our first lambda (the one defining findSymbolInLogicalDylib) will just search for JIT'd code by calling the CompileLayer's findSymbol method. If we don't find a symbol in the JIT itself we'll fall back to our second lambda, which implements -findSymbol. This will use the RTDyldMemoyrManager::getSymbolAddressInProcess +findSymbol. This will use the RTDyldMemoryManager::getSymbolAddressInProcess method to search for the symbol within the program itself. If we can't find a -symbol definition via either of these paths the JIT will refuse to accept our +symbol definition via either of these paths, the JIT will refuse to accept our module, returning a "symbol not found" error. -Now that we've built our symbol resolver we're ready to add our module to the +Now that we've built our symbol resolver, we're ready to add our module to the JIT. We do this by calling the CompileLayer's addModuleSet method [4]_. Since we only have a single Module and addModuleSet expects a collection, we will create a vector of modules and add our module as the only member. Since we -have already typedef'd our ModuleHandle type to be the same as the +have already typedef'd our ModuleHandleT type to be the same as the CompileLayer's handle type, we can return the handle from addModuleSet directly from our addModule method. @@ -304,7 +302,7 @@ treated as a duplicate definition when the next top-level expression is entered. It is generally good to free any module that you know you won't need to call further, just to free up the resources dedicated to it. However, you don't strictly need to do this: All resources will be cleaned up when your -JIT class is destructed, if the haven't been freed before then. +JIT class is destructed, if they haven't been freed before then. This brings us to the end of Chapter 1 of Building a JIT. You now have a basic but fully functioning JIT stack that you can use to take LLVM IR and make it diff --git a/docs/tutorial/LangImpl02.rst b/docs/tutorial/LangImpl02.rst index ac8d2d79874325b50e762e4c278df3b6440d18f1..4be447eb5ba35da9c675e0729ad066b1523dbfd8 100644 --- a/docs/tutorial/LangImpl02.rst +++ b/docs/tutorial/LangImpl02.rst @@ -119,6 +119,8 @@ way to talk about functions themselves: public: PrototypeAST(const std::string &name, std::vector Args) : Name(name), Args(std::move(Args)) {} + + const std::string &getName() const { return Name; } }; /// FunctionAST - This class represents a function definition itself. diff --git a/docs/tutorial/LangImpl03.rst b/docs/tutorial/LangImpl03.rst index 2bb3a300026e0ea1c29fc56f8a87f250dff10d53..1dfe10175c747a0c41011c0f9fe1b4c6414b740b 100644 --- a/docs/tutorial/LangImpl03.rst +++ b/docs/tutorial/LangImpl03.rst @@ -122,7 +122,7 @@ First we'll do numeric literals: .. code-block:: c++ Value *NumberExprAST::codegen() { - return ConstantFP::get(LLVMContext, APFloat(Val)); + return ConstantFP::get(TheContext, APFloat(Val)); } In the LLVM IR, numeric constants are represented with the @@ -171,7 +171,7 @@ variables `_. case '<': L = Builder.CreateFCmpULT(L, R, "cmptmp"); // Convert bool 0/1 to double 0.0 or 1.0 - return Builder.CreateUIToFP(L, Type::getDoubleTy(LLVMContext), + return Builder.CreateUIToFP(L, Type::getDoubleTy(TheContext), "booltmp"); default: return LogErrorV("invalid binary operator"); @@ -270,9 +270,9 @@ with: Function *PrototypeAST::codegen() { // Make the function type: double(double,double) etc. std::vector Doubles(Args.size(), - Type::getDoubleTy(LLVMContext)); + Type::getDoubleTy(TheContext)); FunctionType *FT = - FunctionType::get(Type::getDoubleTy(LLVMContext), Doubles, false); + FunctionType::get(Type::getDoubleTy(TheContext), Doubles, false); Function *F = Function::Create(FT, Function::ExternalLinkage, Name, TheModule); @@ -346,7 +346,7 @@ assert that the function is empty (i.e. has no body yet) before we start. .. code-block:: c++ // Create a new basic block to start insertion into. - BasicBlock *BB = BasicBlock::Create(LLVMContext, "entry", TheFunction); + BasicBlock *BB = BasicBlock::Create(TheContext, "entry", TheFunction); Builder.SetInsertPoint(BB); // Record the function arguments in the NamedValues map. @@ -533,7 +533,8 @@ This shows an extern for the libm "cos" function, and a call to it. ret double %calltmp } -When you quit the current demo, it dumps out the IR for the entire +When you quit the current demo (by sending an EOF via CTRL+D on Linux +or CTRL+Z and ENTER on Windows), it dumps out the IR for the entire module generated. Here you can see the big picture with all the functions referencing each other. diff --git a/docs/tutorial/LangImpl04.rst b/docs/tutorial/LangImpl04.rst index 513bf8f4ab45ea02b110df118c58d90ebfbc72b9..16d7164ae15ee244b3e352118b1e87af348baeb3 100644 --- a/docs/tutorial/LangImpl04.rst +++ b/docs/tutorial/LangImpl04.rst @@ -131,33 +131,29 @@ for us: void InitializeModuleAndPassManager(void) { // Open a new module. - Context LLVMContext; - TheModule = llvm::make_unique("my cool jit", LLVMContext); - TheModule->setDataLayout(TheJIT->getTargetMachine().createDataLayout()); + TheModule = llvm::make_unique("my cool jit", TheContext); // Create a new pass manager attached to it. TheFPM = llvm::make_unique(TheModule.get()); - // Provide basic AliasAnalysis support for GVN. - TheFPM.add(createBasicAliasAnalysisPass()); // Do simple "peephole" optimizations and bit-twiddling optzns. - TheFPM.add(createInstructionCombiningPass()); + TheFPM->add(createInstructionCombiningPass()); // Reassociate expressions. - TheFPM.add(createReassociatePass()); + TheFPM->add(createReassociatePass()); // Eliminate Common SubExpressions. - TheFPM.add(createGVNPass()); + TheFPM->add(createGVNPass()); // Simplify the control flow graph (deleting unreachable blocks, etc). - TheFPM.add(createCFGSimplificationPass()); + TheFPM->add(createCFGSimplificationPass()); - TheFPM.doInitialization(); + TheFPM->doInitialization(); } This code initializes the global module ``TheModule``, and the function pass manager ``TheFPM``, which is attached to ``TheModule``. Once the pass manager is set up, we use a series of "add" calls to add a bunch of LLVM passes. -In this case, we choose to add five passes: one analysis pass (alias analysis), -and four optimization passes. The passes we choose here are a pretty standard set +In this case, we choose to add four optimization passes. +The passes we choose here are a pretty standard set of "cleanup" optimizations that are useful for a wide variety of code. I won't delve into what they do but, believe me, they are a good starting place :). @@ -227,8 +223,10 @@ expressions they type in. For example, if they type in "1 + 2;", we should evaluate and print out 3. If they define a function, they should be able to call it from the command line. -In order to do this, we first declare and initialize the JIT. This is -done by adding a global variable ``TheJIT``, and initializing it in +In order to do this, we first prepare the environment to create code for +the current native target and declare and initialize the JIT. This is +done by calling some ``InitializeNativeTarget\*`` functions and +adding a global variable ``TheJIT``, and initializing it in ``main``: .. code-block:: c++ @@ -236,7 +234,21 @@ done by adding a global variable ``TheJIT``, and initializing it in static std::unique_ptr TheJIT; ... int main() { - .. + InitializeNativeTarget(); + InitializeNativeTargetAsmPrinter(); + InitializeNativeTargetAsmParser(); + + // Install standard binary operators. + // 1 is lowest precedence. + BinopPrecedence['<'] = 10; + BinopPrecedence['+'] = 20; + BinopPrecedence['-'] = 20; + BinopPrecedence['*'] = 40; // highest. + + // Prime the first token. + fprintf(stderr, "ready> "); + getNextToken(); + TheJIT = llvm::make_unique(); // Run the main "interpreter loop" now. @@ -245,9 +257,24 @@ done by adding a global variable ``TheJIT``, and initializing it in return 0; } +We also need to setup the data layout for the JIT: + +.. code-block:: c++ + + void InitializeModuleAndPassManager(void) { + // Open a new module. + TheModule = llvm::make_unique("my cool jit", TheContext); + TheModule->setDataLayout(TheJIT->getTargetMachine().createDataLayout()); + + // Create a new pass manager attached to it. + TheFPM = llvm::make_unique(TheModule.get()); + ... + The KaleidoscopeJIT class is a simple JIT built specifically for these -tutorials. In later chapters we will look at how it works and extend it with -new features, but for now we will take it as given. Its API is very simple:: +tutorials, available inside the LLVM source code +at llvm-src/examples/Kaleidoscope/include/KaleidoscopeJIT.h. +In later chapters we will look at how it works and extend it with +new features, but for now we will take it as given. Its API is very simple: ``addModule`` adds an LLVM IR module to the JIT, making its functions available for execution; ``removeModule`` removes a module, freeing any memory associated with the code in that module; and ``findSymbol`` allows us @@ -554,7 +581,10 @@ most recent to the oldest, to find the newest definition. If no definition is found inside the JIT, it falls back to calling "``dlsym("sin")``" on the Kaleidoscope process itself. Since "``sin``" is defined within the JIT's address space, it simply patches up calls in the module to call the libm -version of ``sin`` directly. +version of ``sin`` directly. But in some cases this even goes further: +as sin and cos are names of standard math functions, the constant folder +will directly evaluate the function calls to the correct result when called +with constants like in the "``sin(1.0)``" above. In the future we'll see how tweaking this symbol resolution rule can be used to enable all sorts of useful features, from security (restricting the set of @@ -567,12 +597,21 @@ if we add: .. code-block:: c++ + #ifdef LLVM_ON_WIN32 + #define DLLEXPORT __declspec(dllexport) + #else + #define DLLEXPORT + #endif + /// putchard - putchar that takes a double and returns 0. - extern "C" double putchard(double X) { + extern "C" DLLEXPORT double putchard(double X) { fputc((char)X, stderr); return 0; } +Note, that for Windows we need to actually export the functions because +the dynamic symbol loader will use GetProcAddress to find the symbols. + Now we can produce simple output to the console by using things like: "``extern putchard(x); putchard(120);``", which prints a lowercase 'x' on the console (120 is the ASCII code for 'x'). Similar code could be diff --git a/docs/tutorial/LangImpl05.rst b/docs/tutorial/LangImpl05.rst index ae0935d9ba1f976a7812b4654954f7b9c2e31806..dcf45bcbf8d20c2d9fdee20c256f8ad1347f20c4 100644 --- a/docs/tutorial/LangImpl05.rst +++ b/docs/tutorial/LangImpl05.rst @@ -103,7 +103,8 @@ To represent the new expression we add a new AST node for it: IfExprAST(std::unique_ptr Cond, std::unique_ptr Then, std::unique_ptr Else) : Cond(std::move(Cond)), Then(std::move(Then)), Else(std::move(Else)) {} - virtual Value *codegen(); + + Value *codegen() override; }; The AST node just has pointers to the various subexpressions. @@ -290,9 +291,9 @@ for ``IfExprAST``: if (!CondV) return nullptr; - // Convert condition to a bool by comparing equal to 0.0. + // Convert condition to a bool by comparing non-equal to 0.0. CondV = Builder.CreateFCmpONE( - CondV, ConstantFP::get(LLVMContext, APFloat(0.0)), "ifcond"); + CondV, ConstantFP::get(TheContext, APFloat(0.0)), "ifcond"); This code is straightforward and similar to what we saw before. We emit the expression for the condition, then compare that value to zero to get @@ -305,9 +306,9 @@ a truth value as a 1-bit (bool) value. // Create blocks for the then and else cases. Insert the 'then' block at the // end of the function. BasicBlock *ThenBB = - BasicBlock::Create(LLVMContext, "then", TheFunction); - BasicBlock *ElseBB = BasicBlock::Create(LLVMContext, "else"); - BasicBlock *MergeBB = BasicBlock::Create(LLVMContext, "ifcont"); + BasicBlock::Create(TheContext, "then", TheFunction); + BasicBlock *ElseBB = BasicBlock::Create(TheContext, "else"); + BasicBlock *MergeBB = BasicBlock::Create(TheContext, "ifcont"); Builder.CreateCondBr(CondV, ThenBB, ElseBB); @@ -400,7 +401,7 @@ code: TheFunction->getBasicBlockList().push_back(MergeBB); Builder.SetInsertPoint(MergeBB); PHINode *PN = - Builder.CreatePHI(Type::getDoubleTy(LLVMContext), 2, "iftmp"); + Builder.CreatePHI(Type::getDoubleTy(TheContext), 2, "iftmp"); PN->addIncoming(ThenV, ThenBB); PN->addIncoming(ElseV, ElseBB); @@ -433,7 +434,7 @@ something more aggressive, a 'for' expression: :: - extern putchard(char) + extern putchard(char); def printstar(n) for i = 1, i < n, 1.0 in putchard(42); # ascii 42 = '*' @@ -500,7 +501,8 @@ variable name and the constituent expressions in the node. std::unique_ptr Body) : VarName(VarName), Start(std::move(Start)), End(std::move(End)), Step(std::move(Step)), Body(std::move(Body)) {} - virtual Value *codegen(); + + Value *codegen() override; }; Parser Extensions for the 'for' Loop @@ -561,6 +563,27 @@ value to null in the AST node: std::move(Body)); } +And again we hook it up as a primary expression: + +.. code-block:: c++ + + static std::unique_ptr ParsePrimary() { + switch (CurTok) { + default: + return LogError("unknown token when expecting an expression"); + case tok_identifier: + return ParseIdentifierExpr(); + case tok_number: + return ParseNumberExpr(); + case '(': + return ParseParenExpr(); + case tok_if: + return ParseIfExpr(); + case tok_for: + return ParseForExpr(); + } + } + LLVM IR for the 'for' Loop -------------------------- @@ -610,7 +633,8 @@ expression for the loop value: Value *ForExprAST::codegen() { // Emit the start code first, without 'variable' in scope. Value *StartVal = Start->codegen(); - if (StartVal == 0) return 0; + if (!StartVal) + return nullptr; With this out of the way, the next step is to set up the LLVM basic block for the start of the loop body. In the case above, the whole loop @@ -625,7 +649,7 @@ expression). Function *TheFunction = Builder.GetInsertBlock()->getParent(); BasicBlock *PreheaderBB = Builder.GetInsertBlock(); BasicBlock *LoopBB = - BasicBlock::Create(LLVMContext, "loop", TheFunction); + BasicBlock::Create(TheContext, "loop", TheFunction); // Insert an explicit fall through from the current block to the LoopBB. Builder.CreateBr(LoopBB); @@ -642,7 +666,7 @@ the two blocks. Builder.SetInsertPoint(LoopBB); // Start the PHI node with an entry for Start. - PHINode *Variable = Builder.CreatePHI(Type::getDoubleTy(LLVMContext), + PHINode *Variable = Builder.CreatePHI(Type::getDoubleTy(TheContext), 2, VarName.c_str()); Variable->addIncoming(StartVal, PreheaderBB); @@ -693,7 +717,7 @@ table. return nullptr; } else { // If not specified, use 1.0. - StepVal = ConstantFP::get(LLVMContext, APFloat(1.0)); + StepVal = ConstantFP::get(TheContext, APFloat(1.0)); } Value *NextVar = Builder.CreateFAdd(Variable, StepVal, "nextvar"); @@ -710,9 +734,9 @@ iteration of the loop. if (!EndCond) return nullptr; - // Convert condition to a bool by comparing equal to 0.0. + // Convert condition to a bool by comparing non-equal to 0.0. EndCond = Builder.CreateFCmpONE( - EndCond, ConstantFP::get(LLVMContext, APFloat(0.0)), "loopcond"); + EndCond, ConstantFP::get(TheContext, APFloat(0.0)), "loopcond"); Finally, we evaluate the exit value of the loop, to determine whether the loop should exit. This mirrors the condition evaluation for the @@ -723,7 +747,7 @@ if/then/else statement. // Create the "after loop" block and insert it. BasicBlock *LoopEndBB = Builder.GetInsertBlock(); BasicBlock *AfterBB = - BasicBlock::Create(LLVMContext, "afterloop", TheFunction); + BasicBlock::Create(TheContext, "afterloop", TheFunction); // Insert the conditional branch into the end of LoopEndBB. Builder.CreateCondBr(EndCond, LoopBB, AfterBB); @@ -751,7 +775,7 @@ insertion position to it. NamedValues.erase(VarName); // for expr always returns 0.0. - return Constant::getNullValue(Type::getDoubleTy(LLVMContext)); + return Constant::getNullValue(Type::getDoubleTy(TheContext)); } The final code handles various cleanups: now that we have the "NextVar" @@ -772,7 +796,7 @@ Full Code Listing ================= Here is the complete code listing for our running example, enhanced with -the if/then/else and for expressions.. To build this example, use: +the if/then/else and for expressions. To build this example, use: .. code-block:: bash diff --git a/docs/tutorial/LangImpl06.rst b/docs/tutorial/LangImpl06.rst index f6d2bd943ef7d6984136d9419ced4f07718a2700..c1035bce8559359a5c969a54a4effe7e1f879c79 100644 --- a/docs/tutorial/LangImpl06.rst +++ b/docs/tutorial/LangImpl06.rst @@ -31,7 +31,7 @@ User-defined Operators: the Idea ================================ The "operator overloading" that we will add to Kaleidoscope is more -general than languages like C++. In C++, you are only allowed to +general than in languages like C++. In C++, you are only allowed to redefine existing operators: you can't programmatically change the grammar, introduce new operators, change precedence levels, etc. In this chapter, we will add this capability to Kaleidoscope, which will let the @@ -41,8 +41,8 @@ The point of going into user-defined operators in a tutorial like this is to show the power and flexibility of using a hand-written parser. Thus far, the parser we have been implementing uses recursive descent for most parts of the grammar and operator precedence parsing for the -expressions. See `Chapter 2 `_ for details. Without -using operator precedence parsing, it would be very difficult to allow +expressions. See `Chapter 2 `_ for details. By +using operator precedence parsing, it is very easy to allow the programmer to introduce new operators into the grammar: the grammar is dynamically extensible as the JIT runs. @@ -143,17 +143,18 @@ this: : Name(name), Args(std::move(Args)), IsOperator(IsOperator), Precedence(Prec) {} + Function *codegen(); + const std::string &getName() const { return Name; } + bool isUnaryOp() const { return IsOperator && Args.size() == 1; } bool isBinaryOp() const { return IsOperator && Args.size() == 2; } char getOperatorName() const { assert(isUnaryOp() || isBinaryOp()); - return Name[Name.size()-1]; + return Name[Name.size() - 1]; } unsigned getBinaryPrecedence() const { return Precedence; } - - Function *codegen(); }; Basically, in addition to knowing a name for the prototype, we now keep @@ -194,7 +195,7 @@ user-defined operator, we need to parse it: // Read the precedence if present. if (CurTok == tok_number) { if (NumVal < 1 || NumVal > 100) - return LogErrorP("Invalid precedecnce: must be 1..100"); + return LogErrorP("Invalid precedence: must be 1..100"); BinaryPrecedence = (unsigned)NumVal; getNextToken(); } @@ -225,7 +226,7 @@ This is all fairly straightforward parsing code, and we have already seen a lot of similar code in the past. One interesting part about the code above is the couple lines that set up ``FnName`` for binary operators. This builds names like "binary@" for a newly defined "@" -operator. This then takes advantage of the fact that symbol names in the +operator. It then takes advantage of the fact that symbol names in the LLVM symbol table are allowed to have any character in them, including embedded nul characters. @@ -251,7 +252,7 @@ default case for our existing binary operator node: case '<': L = Builder.CreateFCmpULT(L, R, "cmptmp"); // Convert bool 0/1 to double 0.0 or 1.0 - return Builder.CreateUIToFP(L, Type::getDoubleTy(LLVMContext), + return Builder.CreateUIToFP(L, Type::getDoubleTy(TheContext), "booltmp"); default: break; @@ -259,7 +260,7 @@ default case for our existing binary operator node: // If it wasn't a builtin binary operator, it must be a user defined one. Emit // a call to it. - Function *F = TheModule->getFunction(std::string("binary") + Op); + Function *F = getFunction(std::string("binary") + Op); assert(F && "binary operator not found!"); Value *Ops[2] = { L, R }; @@ -277,22 +278,21 @@ The final piece of code we are missing, is a bit of top-level magic: .. code-block:: c++ Function *FunctionAST::codegen() { - NamedValues.clear(); - - Function *TheFunction = Proto->codegen(); + // Transfer ownership of the prototype to the FunctionProtos map, but keep a + // reference to it for use below. + auto &P = *Proto; + FunctionProtos[Proto->getName()] = std::move(Proto); + Function *TheFunction = getFunction(P.getName()); if (!TheFunction) return nullptr; // If this is an operator, install it. - if (Proto->isBinaryOp()) - BinopPrecedence[Proto->getOperatorName()] = Proto->getBinaryPrecedence(); + if (P.isBinaryOp()) + BinopPrecedence[P.getOperatorName()] = P.getBinaryPrecedence(); // Create a new basic block to start insertion into. - BasicBlock *BB = BasicBlock::Create(LLVMContext, "entry", TheFunction); - Builder.SetInsertPoint(BB); - - if (Value *RetVal = Body->codegen()) { - ... + BasicBlock *BB = BasicBlock::Create(TheContext, "entry", TheFunction); + ... Basically, before codegening a function, if it is a user-defined operator, we register it in the precedence table. This allows the binary @@ -323,7 +323,8 @@ that, we need an AST node: public: UnaryExprAST(char Opcode, std::unique_ptr Operand) : Opcode(Opcode), Operand(std::move(Operand)) {} - virtual Value *codegen(); + + Value *codegen() override; }; This AST node is very simple and obvious by now. It directly mirrors the @@ -345,7 +346,7 @@ simple: we'll add a new function to do it: int Opc = CurTok; getNextToken(); if (auto Operand = ParseUnary()) - return llvm::unique_ptr(Opc, std::move(Operand)); + return llvm::make_unique(Opc, std::move(Operand)); return nullptr; } @@ -433,7 +434,7 @@ unary operators. It looks like this: if (!OperandV) return nullptr; - Function *F = TheModule->getFunction(std::string("unary")+Opcode); + Function *F = getFunction(std::string("unary") + Opcode); if (!F) return LogErrorV("Unknown unary operator"); @@ -461,7 +462,7 @@ newline): declare double @printd(double) ready> def binary : 1 (x y) 0; # Low-precedence operator that ignores operands. - .. + ... ready> printd(123) : printd(456) : printd(789); 123.000000 456.000000 @@ -518,10 +519,9 @@ denser the character: :: - ready> - - extern putchard(char) - def printdensity(d) + ready> extern putchard(char); + ... + ready> def printdensity(d) if d > 8 then putchard(32) # ' ' else if d > 4 then @@ -538,9 +538,9 @@ denser the character: Evaluated to 0.000000 Based on these simple primitive operations, we can start to define more -interesting things. For example, here's a little function that solves -for the number of iterations it takes a function in the complex plane to -converge: +interesting things. For example, here's a little function that determines +the number of iterations it takes for a certain function in the complex +plane to diverge: :: @@ -742,7 +742,7 @@ Full Code Listing ================= Here is the complete code listing for our running example, enhanced with -the if/then/else and for expressions.. To build this example, use: +the support for user-defined operators. To build this example, use: .. code-block:: bash diff --git a/docs/tutorial/LangImpl07.rst b/docs/tutorial/LangImpl07.rst index 4d86ecad38aaa342fc0f67dba6c31788842f0c4e..582645f449b4156cb6e20e81bcb012d0d03b7a06 100644 --- a/docs/tutorial/LangImpl07.rst +++ b/docs/tutorial/LangImpl07.rst @@ -327,7 +327,7 @@ to update: static std::map NamedValues; -Also, since we will need to create these alloca's, we'll use a helper +Also, since we will need to create these allocas, we'll use a helper function that ensures that the allocas are created in the entry block of the function: @@ -339,7 +339,7 @@ the function: const std::string &VarName) { IRBuilder<> TmpB(&TheFunction->getEntryBlock(), TheFunction->getEntryBlock().begin()); - return TmpB.CreateAlloca(Type::getDoubleTy(LLVMContext), 0, + return TmpB.CreateAlloca(Type::getDoubleTy(TheContext), 0, VarName.c_str()); } @@ -348,7 +348,7 @@ the first instruction (.begin()) of the entry block. It then creates an alloca with the expected name and returns it. Because all values in Kaleidoscope are doubles, there is no need to pass in a type to use. -With this in place, the first functionality change we want to make is to +With this in place, the first functionality change we want to make belongs to variable references. In our new scheme, variables live on the stack, so code generating a reference to them actually needs to produce a load from the stack slot: @@ -377,7 +377,7 @@ the unabridged code): // Create an alloca for the variable in the entry block. AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName); - // Emit the start code first, without 'variable' in scope. + // Emit the start code first, without 'variable' in scope. Value *StartVal = Start->codegen(); if (!StartVal) return nullptr; @@ -408,21 +408,25 @@ them. The code for this is also pretty simple: .. code-block:: c++ - /// CreateArgumentAllocas - Create an alloca for each argument and register the - /// argument in the symbol table so that references to it will succeed. - void PrototypeAST::CreateArgumentAllocas(Function *F) { - Function::arg_iterator AI = F->arg_begin(); - for (unsigned Idx = 0, e = Args.size(); Idx != e; ++Idx, ++AI) { + Function *FunctionAST::codegen() { + ... + Builder.SetInsertPoint(BB); + + // Record the function arguments in the NamedValues map. + NamedValues.clear(); + for (auto &Arg : TheFunction->args()) { // Create an alloca for this variable. - AllocaInst *Alloca = CreateEntryBlockAlloca(F, Args[Idx]); + AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, Arg.getName()); // Store the initial value into the alloca. - Builder.CreateStore(AI, Alloca); + Builder.CreateStore(&Arg, Alloca); // Add arguments to variable symbol table. - NamedValues[Args[Idx]] = Alloca; + NamedValues[Arg.getName()] = Alloca; } - } + + if (Value *RetVal = Body->codegen()) { + ... For each argument, we make an alloca, store the input value to the function into the alloca, and register the alloca as the memory location @@ -434,15 +438,13 @@ get good codegen once again: .. code-block:: c++ - // Set up the optimizer pipeline. Start with registering info about how the - // target lays out data structures. - OurFPM.add(new DataLayout(*TheExecutionEngine->getDataLayout())); // Promote allocas to registers. - OurFPM.add(createPromoteMemoryToRegisterPass()); + TheFPM->add(createPromoteMemoryToRegisterPass()); // Do simple "peephole" optimizations and bit-twiddling optzns. - OurFPM.add(createInstructionCombiningPass()); + TheFPM->add(createInstructionCombiningPass()); // Reassociate expressions. - OurFPM.add(createReassociatePass()); + TheFPM->add(createReassociatePass()); + ... It is interesting to see what the code looks like before and after the mem2reg optimization runs. For example, this is the before/after code @@ -454,7 +456,7 @@ for our recursive fib function. Before the optimization: entry: %x1 = alloca double store double %x, double* %x1 - %x2 = load double* %x1 + %x2 = load double, double* %x1 %cmptmp = fcmp ult double %x2, 3.000000e+00 %booltmp = uitofp i1 %cmptmp to double %ifcond = fcmp one double %booltmp, 0.000000e+00 @@ -464,10 +466,10 @@ for our recursive fib function. Before the optimization: br label %ifcont else: ; preds = %entry - %x3 = load double* %x1 + %x3 = load double, double* %x1 %subtmp = fsub double %x3, 1.000000e+00 %calltmp = call double @fib(double %subtmp) - %x4 = load double* %x1 + %x4 = load double, double* %x1 %subtmp5 = fsub double %x4, 2.000000e+00 %calltmp6 = call double @fib(double %subtmp5) %addtmp = fadd double %calltmp, %calltmp6 @@ -677,10 +679,10 @@ var/in, it looks like this: public: VarExprAST(std::vector>> VarNames, - std::unique_ptr body) - : VarNames(std::move(VarNames)), Body(std::move(Body)) {} + std::unique_ptr Body) + : VarNames(std::move(VarNames)), Body(std::move(Body)) {} - virtual Value *codegen(); + Value *codegen() override; }; var/in allows a list of names to be defined all at once, and each name @@ -812,7 +814,7 @@ previous value that we replace in OldBindings. if (!InitVal) return nullptr; } else { // If not specified, use 0.0. - InitVal = ConstantFP::get(LLVMContext, APFloat(0.0)); + InitVal = ConstantFP::get(TheContext, APFloat(0.0)); } AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName); diff --git a/docs/tutorial/LangImpl09.rst b/docs/tutorial/LangImpl09.rst index 0053960756d29489c25f8e329a49ed5d39714006..fe5a95a5769ec7210b7fc3b8e684dcc069cc8b35 100644 --- a/docs/tutorial/LangImpl09.rst +++ b/docs/tutorial/LangImpl09.rst @@ -18,7 +18,7 @@ Source level debugging uses formatted data that helps a debugger translate from binary and the state of the machine back to the source that the programmer wrote. In LLVM we generally use a format called `DWARF `_. DWARF is a compact encoding -that represents types, source locations, and variable locations. +that represents types, source locations, and variable locations. The short summary of this chapter is that we'll go through the various things you have to add to a programming language to @@ -94,14 +94,14 @@ Then we're going to remove the command line code wherever it exists: return; @@ -1184,7 +1183,6 @@ int main() { BinopPrecedence['*'] = 40; // highest. - + // Prime the first token. - fprintf(stderr, "ready> "); getNextToken(); - + Lastly we're going to disable all of the optimization passes and the JIT so that the only thing that happens after we're done parsing and generating -code is that the llvm IR goes to standard error: +code is that the LLVM IR goes to standard error: .. code-block:: udiff @@ -140,7 +140,7 @@ code is that the llvm IR goes to standard error: - + #endif OurFPM.doInitialization(); - + // Set the global so the code gen can use this. This relatively small set of changes get us to the point that we can compile @@ -166,8 +166,8 @@ DWARF Emission Setup Similar to the ``IRBuilder`` class we have a `DIBuilder `_ class -that helps in constructing debug metadata for an llvm IR file. It -corresponds 1:1 similarly to ``IRBuilder`` and llvm IR, but with nicer names. +that helps in constructing debug metadata for an LLVM IR file. It +corresponds 1:1 similarly to ``IRBuilder`` and LLVM IR, but with nicer names. Using it does require that you be more familiar with DWARF terminology than you needed to be with ``IRBuilder`` and ``Instruction`` names, but if you read through the general documentation on the @@ -194,7 +194,7 @@ expressions: } KSDbgInfo; DIType *DebugInfo::getDoubleTy() { - if (DblTy.isValid()) + if (DblTy) return DblTy; DblTy = DBuilder->createBasicType("double", 64, 64, dwarf::DW_ATE_float); @@ -214,7 +214,7 @@ There are a couple of things to note here. First, while we're producing a compile unit for a language called Kaleidoscope we used the language constant for C. This is because a debugger wouldn't necessarily understand the calling conventions or default ABI for a language it doesn't recognize -and we follow the C ABI in our llvm code generation so it's the closest +and we follow the C ABI in our LLVM code generation so it's the closest thing to accurate. This ensures we can actually call functions from the debugger and have them execute. Secondly, you'll see the "fib.ks" in the call to ``createCompileUnit``. This is a default hard coded value since @@ -259,10 +259,11 @@ information) and construct our function definition: unsigned LineNo = 0; unsigned ScopeLine = 0; DISubprogram *SP = DBuilder->createFunction( - FContext, Name, StringRef(), Unit, LineNo, - CreateFunctionType(Args.size(), Unit), false /* internal linkage */, - true /* definition */, ScopeLine, DINode::FlagPrototyped, false); - F->setSubprogram(SP); + FContext, P.getName(), StringRef(), Unit, LineNo, + CreateFunctionType(TheFunction->arg_size(), Unit), + false /* internal linkage */, true /* definition */, ScopeLine, + DINode::FlagPrototyped, false); + TheFunction->setSubprogram(SP); and we now have an DISubprogram that contains a reference to all of our metadata for the function. @@ -326,10 +327,9 @@ that we pass down through when we create a new expression: giving us locations for each of our expressions and variables. -From this we can make sure to tell ``DIBuilder`` when we're at a new source -location so it can use that when we generate the rest of our code and make -sure that each instruction has source location information. We do this -by constructing another small function: +To make sure that every instruction gets proper source location information, +we have to tell ``Builder`` whenever we're at a new source location. +We use a small helper function for this: .. code-block:: c++ @@ -343,40 +343,23 @@ by constructing another small function: DebugLoc::get(AST->getLine(), AST->getCol(), Scope)); } -that both tells the main ``IRBuilder`` where we are, but also what scope -we're in. Since we've just created a function above we can either be in -the main file scope (like when we created our function), or now we can be -in the function scope we just created. To represent this we create a stack -of scopes: +This both tells the main ``IRBuilder`` where we are, but also what scope +we're in. The scope can either be on compile-unit level or be the nearest +enclosing lexical block like the current function. +To represent this we create a stack of scopes: .. code-block:: c++ std::vector LexicalBlocks; - std::map FnScopeMap; - -and keep a map of each function to the scope that it represents (an -DISubprogram is also an DIScope). - -Then we make sure to: - -.. code-block:: c++ - - KSDbgInfo.emitLocation(this); -emit the location every time we start to generate code for a new AST, and -also: +and push the scope (function) to the top of the stack when we start +generating the code for each function: .. code-block:: c++ - KSDbgInfo.FnScopeMap[this] = SP; - -store the scope (function) when we create it and use it: - - KSDbgInfo.LexicalBlocks.push_back(&KSDbgInfo.FnScopeMap[Proto]); - -when we start generating the code for each function. + KSDbgInfo.LexicalBlocks.push_back(SP); -also, don't forget to pop the scope back off of your scope stack at the +Also, we may not forget to pop the scope back off of the scope stack at the end of the code generation for the function: .. code-block:: c++ @@ -385,6 +368,13 @@ end of the code generation for the function: // unconditionally. KSDbgInfo.LexicalBlocks.pop_back(); +Then we make sure to emit the location every time we start to generate code +for a new AST object: + +.. code-block:: c++ + + KSDbgInfo.emitLocation(this); + Variables ========= @@ -392,25 +382,37 @@ Now that we have functions, we need to be able to print out the variables we have in scope. Let's get our function arguments set up so we can get decent backtraces and see how our functions are being called. It isn't a lot of code, and we generally handle it when we're creating the -argument allocas in ``PrototypeAST::CreateArgumentAllocas``. +argument allocas in ``FunctionAST::codegen``. .. code-block:: c++ - DIScope *Scope = KSDbgInfo.LexicalBlocks.back(); - DIFile *Unit = DBuilder->createFile(KSDbgInfo.TheCU.getFilename(), - KSDbgInfo.TheCU.getDirectory()); - DILocalVariable D = DBuilder->createParameterVariable( - Scope, Args[Idx], Idx + 1, Unit, Line, KSDbgInfo.getDoubleTy(), true); + // Record the function arguments in the NamedValues map. + NamedValues.clear(); + unsigned ArgIdx = 0; + for (auto &Arg : TheFunction->args()) { + // Create an alloca for this variable. + AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, Arg.getName()); + + // Create a debug descriptor for the variable. + DILocalVariable *D = DBuilder->createParameterVariable( + SP, Arg.getName(), ++ArgIdx, Unit, LineNo, KSDbgInfo.getDoubleTy(), + true); + + DBuilder->insertDeclare(Alloca, D, DBuilder->createExpression(), + DebugLoc::get(LineNo, 0, SP), + Builder.GetInsertBlock()); + + // Store the initial value into the alloca. + Builder.CreateStore(&Arg, Alloca); + + // Add arguments to variable symbol table. + NamedValues[Arg.getName()] = Alloca; + } - DBuilder->insertDeclare(Alloca, D, DBuilder->createExpression(), - DebugLoc::get(Line, 0, Scope), - Builder.GetInsertBlock()); -Here we're doing a few things. First, we're grabbing our current scope -for the variable so we can say what range of code our variable is valid -through. Second, we're creating the variable, giving it the scope, +Here we're first creating the variable, giving it the scope (``SP``), the name, source location, type, and since it's an argument, the argument -index. Third, we create an ``lvm.dbg.declare`` call to indicate at the IR +index. Next, we create an ``lvm.dbg.declare`` call to indicate at the IR level that we've got a variable in an alloca (and it gives a starting location for the variable), and setting a source location for the beginning of the scope on the declare. @@ -420,7 +422,7 @@ assumptions based on how code and debug information was generated for them in the past. In this case we need to do a little bit of a hack to avoid generating line information for the function prologue so that the debugger knows to skip over those instructions when setting a breakpoint. So in -``FunctionAST::CodeGen`` we add a couple of lines: +``FunctionAST::CodeGen`` we add some more lines: .. code-block:: c++ @@ -434,7 +436,7 @@ body of the function: .. code-block:: c++ - KSDbgInfo.emitLocation(Body); + KSDbgInfo.emitLocation(Body.get()); With this we have enough debug information to set breakpoints in functions, print out argument variables, and call functions. Not too bad for just a diff --git a/docs/tutorial/OCamlLangImpl5.rst b/docs/tutorial/OCamlLangImpl5.rst index 3a135b23337339ce6665c6e41a11b7b95253be5f..6e17de4b2bde8d6d5fca60bb681ac8fef184e630 100644 --- a/docs/tutorial/OCamlLangImpl5.rst +++ b/docs/tutorial/OCamlLangImpl5.rst @@ -103,19 +103,7 @@ Parser Extensions for If/Then/Else Now that we have the relevant tokens coming from the lexer and we have the AST node to build, our parsing logic is relatively straightforward. -First we define a new parsing function: - -.. code-block:: ocaml - - let rec parse_primary = parser - ... - (* ifexpr ::= 'if' expr 'then' expr 'else' expr *) - | [< 'Token.If; c=parse_expr; - 'Token.Then ?? "expected 'then'"; t=parse_expr; - 'Token.Else ?? "expected 'else'"; e=parse_expr >] -> - Ast.If (c, t, e) - -Next we hook it up as a primary expression: +Next we add a new case for parsing a if-expression as a primary expression: .. code-block:: ocaml diff --git a/examples/BrainF/BrainF.cpp b/examples/BrainF/BrainF.cpp index 91d813a6c3bb4d4b8188cc675ad61d404b5a917e..8af34d04701e32ef713e0af0509321fc0112c39e 100644 --- a/examples/BrainF/BrainF.cpp +++ b/examples/BrainF/BrainF.cpp @@ -74,18 +74,18 @@ void BrainF::header(LLVMContext& C) { //declare i32 @getchar() getchar_func = cast(module-> - getOrInsertFunction("getchar", IntegerType::getInt32Ty(C), NULL)); + getOrInsertFunction("getchar", IntegerType::getInt32Ty(C))); //declare i32 @putchar(i32) putchar_func = cast(module-> getOrInsertFunction("putchar", IntegerType::getInt32Ty(C), - IntegerType::getInt32Ty(C), NULL)); + IntegerType::getInt32Ty(C))); //Function header //define void @brainf() brainf_func = cast(module-> - getOrInsertFunction("brainf", Type::getVoidTy(C), NULL)); + getOrInsertFunction("brainf", Type::getVoidTy(C))); builder = new IRBuilder<>(BasicBlock::Create(C, label, brainf_func)); @@ -156,7 +156,7 @@ void BrainF::header(LLVMContext& C) { //declare i32 @puts(i8 *) Function *puts_func = cast(module-> getOrInsertFunction("puts", IntegerType::getInt32Ty(C), - PointerType::getUnqual(IntegerType::getInt8Ty(C)), NULL)); + PointerType::getUnqual(IntegerType::getInt8Ty(C)))); //brainf.aberror: aberrorbb = BasicBlock::Create(C, label, brainf_func); diff --git a/examples/BrainF/BrainFDriver.cpp b/examples/BrainF/BrainFDriver.cpp index d704506d2442733d823bfa9e4d8cac0d63e92226..65f8033a7e27ecd44b0f04331fd074edd41c69f6 100644 --- a/examples/BrainF/BrainFDriver.cpp +++ b/examples/BrainF/BrainFDriver.cpp @@ -77,7 +77,7 @@ void addMainFunction(Module *mod) { getOrInsertFunction("main", IntegerType::getInt32Ty(mod->getContext()), IntegerType::getInt32Ty(mod->getContext()), PointerType::getUnqual(PointerType::getUnqual( - IntegerType::getInt8Ty(mod->getContext()))), NULL)); + IntegerType::getInt8Ty(mod->getContext()))))); { Function::arg_iterator args = main_func->arg_begin(); Value *arg_0 = &*args++; diff --git a/examples/Fibonacci/fibonacci.cpp b/examples/Fibonacci/fibonacci.cpp index 16e52bf04099053916b76ba2add86b9381dcff87..662cb01dd37e9de96651d5dc71627d13047ed030 100644 --- a/examples/Fibonacci/fibonacci.cpp +++ b/examples/Fibonacci/fibonacci.cpp @@ -54,8 +54,7 @@ static Function *CreateFibFunction(Module *M, LLVMContext &Context) { // to return an int and take an int parameter. Function *FibF = cast(M->getOrInsertFunction("fib", Type::getInt32Ty(Context), - Type::getInt32Ty(Context), - nullptr)); + Type::getInt32Ty(Context))); // Add a basic block to the function. BasicBlock *BB = BasicBlock::Create(Context, "EntryBlock", FibF); diff --git a/examples/HowToUseJIT/HowToUseJIT.cpp b/examples/HowToUseJIT/HowToUseJIT.cpp index 0050d27b45d7fbbbca446fab128c0a14c9d4a7d3..f141fa5a7f541037a42643e309fa32e005f32ae3 100644 --- a/examples/HowToUseJIT/HowToUseJIT.cpp +++ b/examples/HowToUseJIT/HowToUseJIT.cpp @@ -69,11 +69,9 @@ int main() { // Create the add1 function entry and insert this entry into module M. The // function will have a return type of "int" and take an argument of "int". - // The '0' terminates the list of argument types. Function *Add1F = cast(M->getOrInsertFunction("add1", Type::getInt32Ty(Context), - Type::getInt32Ty(Context), - nullptr)); + Type::getInt32Ty(Context))); // Add a basic block to the function. As before, it automatically inserts // because of the last argument. @@ -102,8 +100,7 @@ int main() { // Now we're going to create function `foo', which returns an int and takes no // arguments. Function *FooF = - cast(M->getOrInsertFunction("foo", Type::getInt32Ty(Context), - nullptr)); + cast(M->getOrInsertFunction("foo", Type::getInt32Ty(Context))); // Add a basic block to the FooF function. BB = BasicBlock::Create(Context, "EntryBlock", FooF); diff --git a/examples/Kaleidoscope/BuildingAJIT/Chapter1/KaleidoscopeJIT.h b/examples/Kaleidoscope/BuildingAJIT/Chapter1/KaleidoscopeJIT.h index 22716b2b5215514f9cd4622f95e2226ad24ba791..a14fd1dc20eca72df000180fdde53587bc923b15 100644 --- a/examples/Kaleidoscope/BuildingAJIT/Chapter1/KaleidoscopeJIT.h +++ b/examples/Kaleidoscope/BuildingAJIT/Chapter1/KaleidoscopeJIT.h @@ -22,7 +22,7 @@ #include "llvm/ExecutionEngine/Orc/CompileUtils.h" #include "llvm/ExecutionEngine/Orc/IRCompileLayer.h" #include "llvm/ExecutionEngine/Orc/LambdaResolver.h" -#include "llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h" +#include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Mangler.h" #include "llvm/Support/DynamicLibrary.h" @@ -40,7 +40,7 @@ class KaleidoscopeJIT { private: std::unique_ptr TM; const DataLayout DL; - ObjectLinkingLayer<> ObjectLayer; + RTDyldObjectLinkingLayer<> ObjectLayer; IRCompileLayer CompileLayer; public: diff --git a/examples/Kaleidoscope/BuildingAJIT/Chapter2/KaleidoscopeJIT.h b/examples/Kaleidoscope/BuildingAJIT/Chapter2/KaleidoscopeJIT.h index 91d903029a00a9059f2091e48abb91a9f40032eb..2039be4571a59956b7225518bd98dcdde1c5644c 100644 --- a/examples/Kaleidoscope/BuildingAJIT/Chapter2/KaleidoscopeJIT.h +++ b/examples/Kaleidoscope/BuildingAJIT/Chapter2/KaleidoscopeJIT.h @@ -23,7 +23,7 @@ #include "llvm/ExecutionEngine/Orc/IRCompileLayer.h" #include "llvm/ExecutionEngine/Orc/IRTransformLayer.h" #include "llvm/ExecutionEngine/Orc/LambdaResolver.h" -#include "llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h" +#include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Mangler.h" @@ -44,7 +44,7 @@ class KaleidoscopeJIT { private: std::unique_ptr TM; const DataLayout DL; - ObjectLinkingLayer<> ObjectLayer; + RTDyldObjectLinkingLayer<> ObjectLayer; IRCompileLayer CompileLayer; typedef std::function(std::unique_ptr)> diff --git a/examples/Kaleidoscope/BuildingAJIT/Chapter3/KaleidoscopeJIT.h b/examples/Kaleidoscope/BuildingAJIT/Chapter3/KaleidoscopeJIT.h index eefe6a551fa5264a6155e3c20b670f01e698a7d1..d22d41855072ca62bb03e9dea03f574e1f720d79 100644 --- a/examples/Kaleidoscope/BuildingAJIT/Chapter3/KaleidoscopeJIT.h +++ b/examples/Kaleidoscope/BuildingAJIT/Chapter3/KaleidoscopeJIT.h @@ -24,7 +24,7 @@ #include "llvm/ExecutionEngine/Orc/IRCompileLayer.h" #include "llvm/ExecutionEngine/Orc/IRTransformLayer.h" #include "llvm/ExecutionEngine/Orc/LambdaResolver.h" -#include "llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h" +#include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Mangler.h" @@ -46,7 +46,7 @@ class KaleidoscopeJIT { private: std::unique_ptr TM; const DataLayout DL; - ObjectLinkingLayer<> ObjectLayer; + RTDyldObjectLinkingLayer<> ObjectLayer; IRCompileLayer CompileLayer; typedef std::function(std::unique_ptr)> diff --git a/examples/Kaleidoscope/BuildingAJIT/Chapter4/KaleidoscopeJIT.h b/examples/Kaleidoscope/BuildingAJIT/Chapter4/KaleidoscopeJIT.h index 527d4be09f0f97f8e4763f1a4b4cf2f2d2db660e..e0a78410f7134953450ab3978ec6a439b3a942ee 100644 --- a/examples/Kaleidoscope/BuildingAJIT/Chapter4/KaleidoscopeJIT.h +++ b/examples/Kaleidoscope/BuildingAJIT/Chapter4/KaleidoscopeJIT.h @@ -24,7 +24,7 @@ #include "llvm/ExecutionEngine/Orc/IRCompileLayer.h" #include "llvm/ExecutionEngine/Orc/IRTransformLayer.h" #include "llvm/ExecutionEngine/Orc/LambdaResolver.h" -#include "llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h" +#include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Mangler.h" @@ -73,7 +73,7 @@ class KaleidoscopeJIT { private: std::unique_ptr TM; const DataLayout DL; - ObjectLinkingLayer<> ObjectLayer; + RTDyldObjectLinkingLayer<> ObjectLayer; IRCompileLayer CompileLayer; typedef std::function(std::unique_ptr)> diff --git a/examples/Kaleidoscope/BuildingAJIT/Chapter5/KaleidoscopeJIT.h b/examples/Kaleidoscope/BuildingAJIT/Chapter5/KaleidoscopeJIT.h index d1ef3c9549ffa59214e93fd057a8a34e353cf8f7..70a896fe8f007b908826aaa8ca351133803e47fe 100644 --- a/examples/Kaleidoscope/BuildingAJIT/Chapter5/KaleidoscopeJIT.h +++ b/examples/Kaleidoscope/BuildingAJIT/Chapter5/KaleidoscopeJIT.h @@ -26,7 +26,7 @@ #include "llvm/ExecutionEngine/Orc/IRCompileLayer.h" #include "llvm/ExecutionEngine/Orc/IRTransformLayer.h" #include "llvm/ExecutionEngine/Orc/LambdaResolver.h" -#include "llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h" +#include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h" #include "llvm/ExecutionEngine/Orc/OrcRemoteTargetClient.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/LegacyPassManager.h" @@ -79,7 +79,7 @@ class KaleidoscopeJIT { private: std::unique_ptr TM; const DataLayout DL; - ObjectLinkingLayer<> ObjectLayer; + RTDyldObjectLinkingLayer<> ObjectLayer; IRCompileLayer CompileLayer; typedef std::function(std::unique_ptr)> diff --git a/examples/Kaleidoscope/Chapter2/toy.cpp b/examples/Kaleidoscope/Chapter2/toy.cpp index 8357c5b63fb70bc99426a7b820a953373c183775..4dc917e3f06f3293bac74f00c35913c877e3c9b2 100644 --- a/examples/Kaleidoscope/Chapter2/toy.cpp +++ b/examples/Kaleidoscope/Chapter2/toy.cpp @@ -140,6 +140,8 @@ class PrototypeAST { public: PrototypeAST(const std::string &Name, std::vector Args) : Name(Name), Args(std::move(Args)) {} + + const std::string &getName() const { return Name; } }; /// FunctionAST - This class represents a function definition itself. diff --git a/examples/Kaleidoscope/Chapter4/toy.cpp b/examples/Kaleidoscope/Chapter4/toy.cpp index 3bd077b6e3886f0b3aed16e1e87af45bc2306f64..cf7d6c2bee04e5b0b532d19c2c9d57b0058f63d4 100644 --- a/examples/Kaleidoscope/Chapter4/toy.cpp +++ b/examples/Kaleidoscope/Chapter4/toy.cpp @@ -650,14 +650,20 @@ static void MainLoop() { // "Library" functions that can be "extern'd" from user code. //===----------------------------------------------------------------------===// +#ifdef LLVM_ON_WIN32 +#define DLLEXPORT __declspec(dllexport) +#else +#define DLLEXPORT +#endif + /// putchard - putchar that takes a double and returns 0. -extern "C" double putchard(double X) { +extern "C" DLLEXPORT double putchard(double X) { fputc((char)X, stderr); return 0; } /// printd - printf that takes a double prints it as "%f\n", returning 0. -extern "C" double printd(double X) { +extern "C" DLLEXPORT double printd(double X) { fprintf(stderr, "%f\n", X); return 0; } diff --git a/examples/Kaleidoscope/Chapter5/toy.cpp b/examples/Kaleidoscope/Chapter5/toy.cpp index 795f49c847ea71a421b7895efd4fca976a5b9760..6852973bae40020d0983055ee043cc6763327709 100644 --- a/examples/Kaleidoscope/Chapter5/toy.cpp +++ b/examples/Kaleidoscope/Chapter5/toy.cpp @@ -622,7 +622,7 @@ Value *IfExprAST::codegen() { if (!CondV) return nullptr; - // Convert condition to a bool by comparing equal to 0.0. + // Convert condition to a bool by comparing non-equal to 0.0. CondV = Builder.CreateFCmpONE( CondV, ConstantFP::get(TheContext, APFloat(0.0)), "ifcond"); @@ -736,7 +736,7 @@ Value *ForExprAST::codegen() { if (!EndCond) return nullptr; - // Convert condition to a bool by comparing equal to 0.0. + // Convert condition to a bool by comparing non-equal to 0.0. EndCond = Builder.CreateFCmpONE( EndCond, ConstantFP::get(TheContext, APFloat(0.0)), "loopcond"); @@ -924,14 +924,20 @@ static void MainLoop() { // "Library" functions that can be "extern'd" from user code. //===----------------------------------------------------------------------===// +#ifdef LLVM_ON_WIN32 +#define DLLEXPORT __declspec(dllexport) +#else +#define DLLEXPORT +#endif + /// putchard - putchar that takes a double and returns 0. -extern "C" double putchard(double X) { +extern "C" DLLEXPORT double putchard(double X) { fputc((char)X, stderr); return 0; } /// printd - printf that takes a double prints it as "%f\n", returning 0. -extern "C" double printd(double X) { +extern "C" DLLEXPORT double printd(double X) { fprintf(stderr, "%f\n", X); return 0; } diff --git a/examples/Kaleidoscope/Chapter6/toy.cpp b/examples/Kaleidoscope/Chapter6/toy.cpp index 19e25d37dcdedcc062e2b206de188137ea638b2a..1e0ddca29b61f197555e25b971fd6d4ee4f80af0 100644 --- a/examples/Kaleidoscope/Chapter6/toy.cpp +++ b/examples/Kaleidoscope/Chapter6/toy.cpp @@ -567,7 +567,7 @@ static std::unique_ptr ParsePrototype() { // Read the precedence if present. if (CurTok == tok_number) { if (NumVal < 1 || NumVal > 100) - return LogErrorP("Invalid precedecnce: must be 1..100"); + return LogErrorP("Invalid precedence: must be 1..100"); BinaryPrecedence = (unsigned)NumVal; getNextToken(); } @@ -734,7 +734,7 @@ Value *IfExprAST::codegen() { if (!CondV) return nullptr; - // Convert condition to a bool by comparing equal to 0.0. + // Convert condition to a bool by comparing non-equal to 0.0. CondV = Builder.CreateFCmpONE( CondV, ConstantFP::get(TheContext, APFloat(0.0)), "ifcond"); @@ -848,7 +848,7 @@ Value *ForExprAST::codegen() { if (!EndCond) return nullptr; - // Convert condition to a bool by comparing equal to 0.0. + // Convert condition to a bool by comparing non-equal to 0.0. EndCond = Builder.CreateFCmpONE( EndCond, ConstantFP::get(TheContext, APFloat(0.0)), "loopcond"); @@ -1043,14 +1043,20 @@ static void MainLoop() { // "Library" functions that can be "extern'd" from user code. //===----------------------------------------------------------------------===// +#ifdef LLVM_ON_WIN32 +#define DLLEXPORT __declspec(dllexport) +#else +#define DLLEXPORT +#endif + /// putchard - putchar that takes a double and returns 0. -extern "C" double putchard(double X) { +extern "C" DLLEXPORT double putchard(double X) { fputc((char)X, stderr); return 0; } /// printd - printf that takes a double prints it as "%f\n", returning 0. -extern "C" double printd(double X) { +extern "C" DLLEXPORT double printd(double X) { fprintf(stderr, "%f\n", X); return 0; } diff --git a/examples/Kaleidoscope/Chapter7/CMakeLists.txt b/examples/Kaleidoscope/Chapter7/CMakeLists.txt index e67d7928efe79539522bb8e955de6c522469aa31..69e78be6a620757daaf4bb13227a892b04cca4e4 100644 --- a/examples/Kaleidoscope/Chapter7/CMakeLists.txt +++ b/examples/Kaleidoscope/Chapter7/CMakeLists.txt @@ -7,6 +7,7 @@ set(LLVM_LINK_COMPONENTS RuntimeDyld ScalarOpts Support + TransformUtils native ) diff --git a/examples/Kaleidoscope/Chapter7/toy.cpp b/examples/Kaleidoscope/Chapter7/toy.cpp index 7e723ba0397bd1cae4e737977ec2d7b057c71fdb..2f8cb682a847141dfc6fa348a25af61107b71d17 100644 --- a/examples/Kaleidoscope/Chapter7/toy.cpp +++ b/examples/Kaleidoscope/Chapter7/toy.cpp @@ -639,7 +639,7 @@ static std::unique_ptr ParsePrototype() { // Read the precedence if present. if (CurTok == tok_number) { if (NumVal < 1 || NumVal > 100) - return LogErrorP("Invalid precedecnce: must be 1..100"); + return LogErrorP("Invalid precedence: must be 1..100"); BinaryPrecedence = (unsigned)NumVal; getNextToken(); } @@ -840,7 +840,7 @@ Value *IfExprAST::codegen() { if (!CondV) return nullptr; - // Convert condition to a bool by comparing equal to 0.0. + // Convert condition to a bool by comparing non-equal to 0.0. CondV = Builder.CreateFCmpONE( CondV, ConstantFP::get(TheContext, APFloat(0.0)), "ifcond"); @@ -963,7 +963,7 @@ Value *ForExprAST::codegen() { Value *NextVar = Builder.CreateFAdd(CurVar, StepVal, "nextvar"); Builder.CreateStore(NextVar, Alloca); - // Convert condition to a bool by comparing equal to 0.0. + // Convert condition to a bool by comparing non-equal to 0.0. EndCond = Builder.CreateFCmpONE( EndCond, ConstantFP::get(TheContext, APFloat(0.0)), "loopcond"); @@ -1115,6 +1115,8 @@ static void InitializeModuleAndPassManager() { // Create a new pass manager attached to it. TheFPM = llvm::make_unique(TheModule.get()); + // Promote allocas to registers. + TheFPM->add(createPromoteMemoryToRegisterPass()); // Do simple "peephole" optimizations and bit-twiddling optzns. TheFPM->add(createInstructionCombiningPass()); // Reassociate expressions. @@ -1210,14 +1212,20 @@ static void MainLoop() { // "Library" functions that can be "extern'd" from user code. //===----------------------------------------------------------------------===// +#ifdef LLVM_ON_WIN32 +#define DLLEXPORT __declspec(dllexport) +#else +#define DLLEXPORT +#endif + /// putchard - putchar that takes a double and returns 0. -extern "C" double putchard(double X) { +extern "C" DLLEXPORT double putchard(double X) { fputc((char)X, stderr); return 0; } /// printd - printf that takes a double prints it as "%f\n", returning 0. -extern "C" double printd(double X) { +extern "C" DLLEXPORT double printd(double X) { fprintf(stderr, "%f\n", X); return 0; } diff --git a/examples/Kaleidoscope/Chapter8/toy.cpp b/examples/Kaleidoscope/Chapter8/toy.cpp index 354380adfc4a8c6bb7cda763d5483bdf57885d96..cdf650973b86022fc65162ad053ca3345312ee1f 100644 --- a/examples/Kaleidoscope/Chapter8/toy.cpp +++ b/examples/Kaleidoscope/Chapter8/toy.cpp @@ -642,7 +642,7 @@ static std::unique_ptr ParsePrototype() { // Read the precedence if present. if (CurTok == tok_number) { if (NumVal < 1 || NumVal > 100) - return LogErrorP("Invalid precedecnce: must be 1..100"); + return LogErrorP("Invalid precedence: must be 1..100"); BinaryPrecedence = (unsigned)NumVal; getNextToken(); } @@ -841,7 +841,7 @@ Value *IfExprAST::codegen() { if (!CondV) return nullptr; - // Convert condition to a bool by comparing equal to 0.0. + // Convert condition to a bool by comparing non-equal to 0.0. CondV = Builder.CreateFCmpONE( CondV, ConstantFP::get(TheContext, APFloat(0.0)), "ifcond"); @@ -964,7 +964,7 @@ Value *ForExprAST::codegen() { Value *NextVar = Builder.CreateFAdd(CurVar, StepVal, "nextvar"); Builder.CreateStore(NextVar, Alloca); - // Convert condition to a bool by comparing equal to 0.0. + // Convert condition to a bool by comparing non-equal to 0.0. EndCond = Builder.CreateFCmpONE( EndCond, ConstantFP::get(TheContext, APFloat(0.0)), "loopcond"); @@ -1173,14 +1173,20 @@ static void MainLoop() { // "Library" functions that can be "extern'd" from user code. //===----------------------------------------------------------------------===// +#ifdef LLVM_ON_WIN32 +#define DLLEXPORT __declspec(dllexport) +#else +#define DLLEXPORT +#endif + /// putchard - putchar that takes a double and returns 0. -extern "C" double putchard(double X) { +extern "C" DLLEXPORT double putchard(double X) { fputc((char)X, stderr); return 0; } /// printd - printf that takes a double prints it as "%f\n", returning 0. -extern "C" double printd(double X) { +extern "C" DLLEXPORT double printd(double X) { fprintf(stderr, "%f\n", X); return 0; } diff --git a/examples/Kaleidoscope/Chapter9/toy.cpp b/examples/Kaleidoscope/Chapter9/toy.cpp index aa609933fc9d147bcec5d086f177c5257aa0cec7..1b13e45ec4601f6b3677e1772fc033ef0640b3bb 100644 --- a/examples/Kaleidoscope/Chapter9/toy.cpp +++ b/examples/Kaleidoscope/Chapter9/toy.cpp @@ -756,7 +756,7 @@ static std::unique_ptr ParsePrototype() { // Read the precedence if present. if (CurTok == tok_number) { if (NumVal < 1 || NumVal > 100) - return LogErrorP("Invalid precedecnce: must be 1..100"); + return LogErrorP("Invalid precedence: must be 1..100"); BinaryPrecedence = (unsigned)NumVal; getNextToken(); } @@ -1004,7 +1004,7 @@ Value *IfExprAST::codegen() { if (!CondV) return nullptr; - // Convert condition to a bool by comparing equal to 0.0. + // Convert condition to a bool by comparing non-equal to 0.0. CondV = Builder.CreateFCmpONE( CondV, ConstantFP::get(TheContext, APFloat(0.0)), "ifcond"); @@ -1129,7 +1129,7 @@ Value *ForExprAST::codegen() { Value *NextVar = Builder.CreateFAdd(CurVar, StepVal, "nextvar"); Builder.CreateStore(NextVar, Alloca); - // Convert condition to a bool by comparing equal to 0.0. + // Convert condition to a bool by comparing non-equal to 0.0. EndCond = Builder.CreateFCmpONE( EndCond, ConstantFP::get(TheContext, APFloat(0.0)), "loopcond"); @@ -1379,14 +1379,20 @@ static void MainLoop() { // "Library" functions that can be "extern'd" from user code. //===----------------------------------------------------------------------===// +#ifdef LLVM_ON_WIN32 +#define DLLEXPORT __declspec(dllexport) +#else +#define DLLEXPORT +#endif + /// putchard - putchar that takes a double and returns 0. -extern "C" double putchard(double X) { +extern "C" DLLEXPORT double putchard(double X) { fputc((char)X, stderr); return 0; } /// printd - printf that takes a double prints it as "%f\n", returning 0. -extern "C" double printd(double X) { +extern "C" DLLEXPORT double printd(double X) { fprintf(stderr, "%f\n", X); return 0; } diff --git a/examples/Kaleidoscope/include/KaleidoscopeJIT.h b/examples/Kaleidoscope/include/KaleidoscopeJIT.h index 6130107bdd942e28e26f1242e3be172672d00601..1dca39deba3c39e052796b2b61d1c3efa3f81af7 100644 --- a/examples/Kaleidoscope/include/KaleidoscopeJIT.h +++ b/examples/Kaleidoscope/include/KaleidoscopeJIT.h @@ -24,7 +24,7 @@ #include "llvm/ExecutionEngine/Orc/CompileUtils.h" #include "llvm/ExecutionEngine/Orc/IRCompileLayer.h" #include "llvm/ExecutionEngine/Orc/LambdaResolver.h" -#include "llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h" +#include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Mangler.h" #include "llvm/Support/DynamicLibrary.h" @@ -40,7 +40,7 @@ namespace orc { class KaleidoscopeJIT { public: - typedef ObjectLinkingLayer<> ObjLayerT; + typedef RTDyldObjectLinkingLayer<> ObjLayerT; typedef IRCompileLayer CompileLayerT; typedef CompileLayerT::ModuleSetHandleT ModuleHandleT; @@ -97,17 +97,40 @@ private: } JITSymbol findMangledSymbol(const std::string &Name) { +#ifdef LLVM_ON_WIN32 + // The symbol lookup of ObjectLinkingLayer uses the SymbolRef::SF_Exported + // flag to decide whether a symbol will be visible or not, when we call + // IRCompileLayer::findSymbolIn with ExportedSymbolsOnly set to true. + // + // But for Windows COFF objects, this flag is currently never set. + // For a potential solution see: https://reviews.llvm.org/rL258665 + // For now, we allow non-exported symbols on Windows as a workaround. + const bool ExportedSymbolsOnly = false; +#else + const bool ExportedSymbolsOnly = true; +#endif + // Search modules in reverse order: from last added to first added. // This is the opposite of the usual search order for dlsym, but makes more // sense in a REPL where we want to bind to the newest available definition. for (auto H : make_range(ModuleHandles.rbegin(), ModuleHandles.rend())) - if (auto Sym = CompileLayer.findSymbolIn(H, Name, true)) + if (auto Sym = CompileLayer.findSymbolIn(H, Name, ExportedSymbolsOnly)) return Sym; // If we can't find the symbol in the JIT, try looking in the host process. if (auto SymAddr = RTDyldMemoryManager::getSymbolAddressInProcess(Name)) return JITSymbol(SymAddr, JITSymbolFlags::Exported); +#ifdef LLVM_ON_WIN32 + // For Windows retry without "_" at begining, as RTDyldMemoryManager uses + // GetProcAddress and standard libraries like msvcrt.dll use names + // with and without "_" (for example "_itoa" but "sin"). + if (Name.length() > 2 && Name[0] == '_') + if (auto SymAddr = + RTDyldMemoryManager::getSymbolAddressInProcess(Name.substr(1))) + return JITSymbol(SymAddr, JITSymbolFlags::Exported); +#endif + return nullptr; } diff --git a/examples/ParallelJIT/CMakeLists.txt b/examples/ParallelJIT/CMakeLists.txt index e85b470f5036b8c5b9ade0b707e8b5cdd79475f0..deeee072b33caa3896a10b9550d1bb8ad1875979 100644 --- a/examples/ParallelJIT/CMakeLists.txt +++ b/examples/ParallelJIT/CMakeLists.txt @@ -11,4 +11,4 @@ add_llvm_example(ParallelJIT ParallelJIT.cpp ) -target_link_libraries(ParallelJIT ${PTHREAD_LIB}) +target_link_libraries(ParallelJIT ${LLVM_PTHREAD_LIB}) diff --git a/examples/ParallelJIT/ParallelJIT.cpp b/examples/ParallelJIT/ParallelJIT.cpp index 6fb8bd61982b5a141d305c2827d6badc88e9fb5a..f1932d2471cb88dcf190c290d0df9a94732d4ba0 100644 --- a/examples/ParallelJIT/ParallelJIT.cpp +++ b/examples/ParallelJIT/ParallelJIT.cpp @@ -54,8 +54,7 @@ static Function* createAdd1(Module *M) { Function *Add1F = cast(M->getOrInsertFunction("add1", Type::getInt32Ty(M->getContext()), - Type::getInt32Ty(M->getContext()), - nullptr)); + Type::getInt32Ty(M->getContext()))); // Add a basic block to the function. As before, it automatically inserts // because of the last argument. @@ -85,8 +84,7 @@ static Function *CreateFibFunction(Module *M) { Function *FibF = cast(M->getOrInsertFunction("fib", Type::getInt32Ty(M->getContext()), - Type::getInt32Ty(M->getContext()), - nullptr)); + Type::getInt32Ty(M->getContext()))); // Add a basic block to the function. BasicBlock *BB = BasicBlock::Create(M->getContext(), "EntryBlock", FibF); diff --git a/include/llvm-c/Transforms/Scalar.h b/include/llvm-c/Transforms/Scalar.h index 8991e0904849cd84851eb538147acc4218638464..b9612b9cec0443dc1761b68096bd4e3bbea46654 100644 --- a/include/llvm-c/Transforms/Scalar.h +++ b/include/llvm-c/Transforms/Scalar.h @@ -44,6 +44,9 @@ void LLVMAddAlignmentFromAssumptionsPass(LLVMPassManagerRef PM); /** See llvm::createCFGSimplificationPass function. */ void LLVMAddCFGSimplificationPass(LLVMPassManagerRef PM); +/** See llvm::createLateCFGSimplificationPass function. */ +void LLVMAddLateCFGSimplificationPass(LLVMPassManagerRef PM); + /** See llvm::createDeadStoreEliminationPass function. */ void LLVMAddDeadStoreEliminationPass(LLVMPassManagerRef PM); diff --git a/include/llvm-c/lto.h b/include/llvm-c/lto.h index c3af74cdedabed52bdd45dd4fe03d334d9beedfc..8d45b783204172e8e43c4845c499fe1ea8124dbc 100644 --- a/include/llvm-c/lto.h +++ b/include/llvm-c/lto.h @@ -551,7 +551,7 @@ lto_codegen_set_should_embed_uselists(lto_code_gen_t cg, lto_bool_t ShouldEmbedUselists); /** - * @} + * @} // endgoup LLVMCLTO * @defgroup LLVMCTLTO ThinLTO * @ingroup LLVMC * @@ -668,75 +668,6 @@ const char *thinlto_module_get_object_file(thinlto_code_gen_t cg, extern lto_bool_t thinlto_codegen_set_pic_model(thinlto_code_gen_t cg, lto_codegen_model); -/** - * @} - * @defgroup LLVMCTLTO_CACHING ThinLTO Cache Control - * @ingroup LLVMCTLTO - * - * These entry points control the ThinLTO cache. The cache is intended to - * support incremental build, and thus needs to be persistent accross build. - * The client enabled the cache by supplying a path to an existing directory. - * The code generator will use this to store objects files that may be reused - * during a subsequent build. - * To avoid filling the disk space, a few knobs are provided: - * - The pruning interval limit the frequency at which the garbage collector - * will try to scan the cache directory to prune it from expired entries. - * Setting to -1 disable the pruning (default). - * - The pruning expiration time indicates to the garbage collector how old an - * entry needs to be to be removed. - * - Finally, the garbage collector can be instructed to prune the cache till - * the occupied space goes below a threshold. - * @{ - */ - -/** - * Sets the path to a directory to use as a cache storage for incremental build. - * Setting this activates caching. - * - * \since LTO_API_VERSION=18 - */ -extern void thinlto_codegen_set_cache_dir(thinlto_code_gen_t cg, - const char *cache_dir); - -/** - * Sets the cache pruning interval (in seconds). A negative value disable the - * pruning. An unspecified default value will be applied, and a value of 0 will - * be ignored. - * - * \since LTO_API_VERSION=18 - */ -extern void thinlto_codegen_set_cache_pruning_interval(thinlto_code_gen_t cg, - int interval); - -/** - * Sets the maximum cache size that can be persistent across build, in terms of - * percentage of the available space on the the disk. Set to 100 to indicate - * no limit, 50 to indicate that the cache size will not be left over half the - * available space. A value over 100 will be reduced to 100, a value of 0 will - * be ignored. An unspecified default value will be applied. - * - * The formula looks like: - * AvailableSpace = FreeSpace + ExistingCacheSize - * NewCacheSize = AvailableSpace * P/100 - * - * \since LTO_API_VERSION=18 - */ -extern void thinlto_codegen_set_final_cache_size_relative_to_available_space( - thinlto_code_gen_t cg, unsigned percentage); - -/** - * Sets the expiration (in seconds) for an entry in the cache. An unspecified - * default value will be applied. A value of 0 will be ignored. - * - * \since LTO_API_VERSION=18 - */ -extern void thinlto_codegen_set_cache_entry_expiration(thinlto_code_gen_t cg, - unsigned expiration); - -/** - * @} - */ - /** * Sets the path to a directory to use as a storage for temporary bitcode files. * The intention is to make the bitcode files available for debugging at various @@ -820,12 +751,77 @@ extern void thinlto_codegen_add_cross_referenced_symbol(thinlto_code_gen_t cg, const char *name, int length); -#ifdef __cplusplus -} -#endif +/** + * @} // endgoup LLVMCTLTO + * @defgroup LLVMCTLTO_CACHING ThinLTO Cache Control + * @ingroup LLVMCTLTO + * + * These entry points control the ThinLTO cache. The cache is intended to + * support incremental build, and thus needs to be persistent accross build. + * The client enabled the cache by supplying a path to an existing directory. + * The code generator will use this to store objects files that may be reused + * during a subsequent build. + * To avoid filling the disk space, a few knobs are provided: + * - The pruning interval limit the frequency at which the garbage collector + * will try to scan the cache directory to prune it from expired entries. + * Setting to -1 disable the pruning (default). + * - The pruning expiration time indicates to the garbage collector how old an + * entry needs to be to be removed. + * - Finally, the garbage collector can be instructed to prune the cache till + * the occupied space goes below a threshold. + * @{ + */ + +/** + * Sets the path to a directory to use as a cache storage for incremental build. + * Setting this activates caching. + * + * \since LTO_API_VERSION=18 + */ +extern void thinlto_codegen_set_cache_dir(thinlto_code_gen_t cg, + const char *cache_dir); + +/** + * Sets the cache pruning interval (in seconds). A negative value disable the + * pruning. An unspecified default value will be applied, and a value of 0 will + * be ignored. + * + * \since LTO_API_VERSION=18 + */ +extern void thinlto_codegen_set_cache_pruning_interval(thinlto_code_gen_t cg, + int interval); + +/** + * Sets the maximum cache size that can be persistent across build, in terms of + * percentage of the available space on the the disk. Set to 100 to indicate + * no limit, 50 to indicate that the cache size will not be left over half the + * available space. A value over 100 will be reduced to 100, a value of 0 will + * be ignored. An unspecified default value will be applied. + * + * The formula looks like: + * AvailableSpace = FreeSpace + ExistingCacheSize + * NewCacheSize = AvailableSpace * P/100 + * + * \since LTO_API_VERSION=18 + */ +extern void thinlto_codegen_set_final_cache_size_relative_to_available_space( + thinlto_code_gen_t cg, unsigned percentage); /** - * @} + * Sets the expiration (in seconds) for an entry in the cache. An unspecified + * default value will be applied. A value of 0 will be ignored. + * + * \since LTO_API_VERSION=18 */ +extern void thinlto_codegen_set_cache_entry_expiration(thinlto_code_gen_t cg, + unsigned expiration); + +/** + * @} // endgroup LLVMCTLTO_CACHING + */ + +#ifdef __cplusplus +} +#endif #endif /* LLVM_C_LTO_H */ diff --git a/include/llvm/ADT/APFloat.h b/include/llvm/ADT/APFloat.h index c9a39ae6b047f93a93e0e33806ccd3e713491fbe..e7e5036e69307b7ceb5d8afdab5662bde70093bf 100644 --- a/include/llvm/ADT/APFloat.h +++ b/include/llvm/ADT/APFloat.h @@ -18,6 +18,7 @@ #define LLVM_ADT_APFLOAT_H #include "llvm/ADT/APInt.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/Support/ErrorHandling.h" #include @@ -139,22 +140,25 @@ enum lostFraction { // Example of truncated bits: // implementation classes. This struct should not define any non-static data // members. struct APFloatBase { + // TODO remove this and use APInt typedef directly. + typedef APInt::WordType integerPart; + /// A signed type to represent a floating point numbers unbiased exponent. typedef signed short ExponentType; /// \name Floating Point Semantics. /// @{ - static const fltSemantics &IEEEhalf(); - static const fltSemantics &IEEEsingle(); - static const fltSemantics &IEEEdouble(); - static const fltSemantics &IEEEquad(); - static const fltSemantics &PPCDoubleDouble(); - static const fltSemantics &x87DoubleExtended(); + static const fltSemantics &IEEEhalf() LLVM_READNONE; + static const fltSemantics &IEEEsingle() LLVM_READNONE; + static const fltSemantics &IEEEdouble() LLVM_READNONE; + static const fltSemantics &IEEEquad() LLVM_READNONE; + static const fltSemantics &PPCDoubleDouble() LLVM_READNONE; + static const fltSemantics &x87DoubleExtended() LLVM_READNONE; /// A Pseudo fltsemantic used to construct APFloats that cannot conflict with /// anything real. - static const fltSemantics &Bogus(); + static const fltSemantics &Bogus() LLVM_READNONE; /// @} @@ -273,8 +277,8 @@ public: /// @{ opStatus convert(const fltSemantics &, roundingMode, bool *); - opStatus convertToInteger(integerPart *, unsigned int, bool, roundingMode, - bool *) const; + opStatus convertToInteger(MutableArrayRef, unsigned int, bool, + roundingMode, bool *) const; opStatus convertFromAPInt(const APInt &, bool, roundingMode); opStatus convertFromSignExtendedInteger(const integerPart *, unsigned int, bool, roundingMode); @@ -361,7 +365,7 @@ public: /// Returns true if and only if the number has the largest possible finite /// magnitude in the current semantics. bool isLargest() const; - + /// Returns true if and only if the number is an exact integer. bool isInteger() const; @@ -495,8 +499,9 @@ private: opStatus addOrSubtract(const IEEEFloat &, roundingMode, bool subtract); opStatus handleOverflow(roundingMode); bool roundAwayFromZero(roundingMode, lostFraction, unsigned int) const; - opStatus convertToSignExtendedInteger(integerPart *, unsigned int, bool, - roundingMode, bool *) const; + opStatus convertToSignExtendedInteger(MutableArrayRef, + unsigned int, bool, roundingMode, + bool *) const; opStatus convertFromUnsignedParts(const integerPart *, unsigned int, roundingMode); opStatus convertFromHexadecimalString(StringRef, roundingMode); @@ -625,8 +630,8 @@ public: opStatus convertFromString(StringRef, roundingMode); opStatus next(bool nextDown); - opStatus convertToInteger(integerPart *Input, unsigned int Width, - bool IsSigned, roundingMode RM, + opStatus convertToInteger(MutableArrayRef Input, + unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const; opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM); opStatus convertFromSignExtendedInteger(const integerPart *Input, @@ -1055,8 +1060,8 @@ public: opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo); - opStatus convertToInteger(integerPart *Input, unsigned int Width, - bool IsSigned, roundingMode RM, + opStatus convertToInteger(MutableArrayRef Input, + unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const { APFLOAT_DISPATCH_ON_SEMANTICS( convertToInteger(Input, Width, IsSigned, RM, IsExact)); diff --git a/include/llvm/ADT/APInt.h b/include/llvm/ADT/APInt.h index 2c0713da256cdc2594cd55d83928a5c256864b2f..045df3c908756aa90498af34fda37c614a1cf71d 100644 --- a/include/llvm/ADT/APInt.h +++ b/include/llvm/ADT/APInt.h @@ -32,14 +32,6 @@ class raw_ostream; template class SmallVectorImpl; template class ArrayRef; -// An unsigned host type used as a single part of a multi-part -// bignum. -typedef uint64_t integerPart; - -const unsigned int host_char_bit = 8; -const unsigned int integerPartWidth = - host_char_bit * static_cast(sizeof(integerPart)); - class APInt; inline APInt operator-(APInt); @@ -75,6 +67,18 @@ inline APInt operator-(APInt); /// uses in its IR. This simplifies its use for LLVM. /// class LLVM_NODISCARD APInt { +public: + typedef uint64_t WordType; + + /// This enum is used to hold the constants we needed for APInt. + enum : unsigned { + /// Byte size of a word. + APINT_WORD_SIZE = sizeof(WordType), + /// Bits in a word. + APINT_BITS_PER_WORD = APINT_WORD_SIZE * CHAR_BIT + }; + +private: unsigned BitWidth; ///< The number of bits in this APInt. /// This union is used to store the integer value. When the @@ -84,15 +88,6 @@ class LLVM_NODISCARD APInt { uint64_t *pVal; ///< Used to store the >64 bits integer value. }; - /// This enum is used to hold the constants we needed for APInt. - enum { - /// Bits in a word - APINT_BITS_PER_WORD = - static_cast(sizeof(uint64_t)) * CHAR_BIT, - /// Byte size of a word - APINT_WORD_SIZE = static_cast(sizeof(uint64_t)) - }; - friend struct DenseMapAPIntKeyInfo; /// \brief Fast internal constructor @@ -147,7 +142,7 @@ class LLVM_NODISCARD APInt { return *this; // Mask out the high bits. - uint64_t mask = ~uint64_t(0ULL) >> (APINT_BITS_PER_WORD - wordBits); + uint64_t mask = UINT64_MAX >> (APINT_BITS_PER_WORD - wordBits); if (isSingleWord()) VAL &= mask; else @@ -196,32 +191,38 @@ class LLVM_NODISCARD APInt { /// out-of-line slow case for shl APInt shlSlowCase(unsigned shiftAmt) const; - /// out-of-line slow case for operator& - APInt AndSlowCase(const APInt &RHS) const; - - /// out-of-line slow case for operator| - APInt OrSlowCase(const APInt &RHS) const; - - /// out-of-line slow case for operator^ - APInt XorSlowCase(const APInt &RHS) const; - /// out-of-line slow case for operator= APInt &AssignSlowCase(const APInt &RHS); /// out-of-line slow case for operator== - bool EqualSlowCase(const APInt &RHS) const; + bool EqualSlowCase(const APInt &RHS) const LLVM_READONLY; /// out-of-line slow case for operator== - bool EqualSlowCase(uint64_t Val) const; + bool EqualSlowCase(uint64_t Val) const LLVM_READONLY; /// out-of-line slow case for countLeadingZeros - unsigned countLeadingZerosSlowCase() const; + unsigned countLeadingZerosSlowCase() const LLVM_READONLY; /// out-of-line slow case for countTrailingOnes - unsigned countTrailingOnesSlowCase() const; + unsigned countTrailingOnesSlowCase() const LLVM_READONLY; /// out-of-line slow case for countPopulation - unsigned countPopulationSlowCase() const; + unsigned countPopulationSlowCase() const LLVM_READONLY; + + /// out-of-line slow case for setBits. + void setBitsSlowCase(unsigned loBit, unsigned hiBit); + + /// out-of-line slow case for flipAllBits. + void flipAllBitsSlowCase(); + + /// out-of-line slow case for operator&=. + APInt& AndAssignSlowCase(const APInt& RHS); + + /// out-of-line slow case for operator|=. + APInt& OrAssignSlowCase(const APInt& RHS); + + /// out-of-line slow case for operator^=. + APInt& XorAssignSlowCase(const APInt& RHS); public: /// \name Constructors @@ -238,13 +239,14 @@ public: /// \param val the initial value of the APInt /// \param isSigned how to treat signedness of val APInt(unsigned numBits, uint64_t val, bool isSigned = false) - : BitWidth(numBits), VAL(0) { + : BitWidth(numBits) { assert(BitWidth && "bitwidth too small"); - if (isSingleWord()) + if (isSingleWord()) { VAL = val; - else + clearUnusedBits(); + } else { initSlowCase(val, isSigned); - clearUnusedBits(); + } } /// \brief Construct an APInt of numBits width, initialized as bigVal[]. @@ -280,7 +282,7 @@ public: /// Simply makes *this a copy of that. /// @brief Copy Constructor. - APInt(const APInt &that) : BitWidth(that.BitWidth), VAL(0) { + APInt(const APInt &that) : BitWidth(that.BitWidth) { if (isSingleWord()) VAL = that.VAL; else @@ -341,7 +343,7 @@ public: /// This checks to see if the value has all bits of the APInt are set or not. bool isAllOnesValue() const { if (isSingleWord()) - return VAL == ~integerPart(0) >> (APINT_BITS_PER_WORD - BitWidth); + return VAL == UINT64_MAX >> (APINT_BITS_PER_WORD - BitWidth); return countPopulationSlowCase() == BitWidth; } @@ -406,7 +408,7 @@ public: /// If this value is smaller than the specified limit, return it, otherwise /// return the limit value. This causes the value to saturate to the limit. - uint64_t getLimitedValue(uint64_t Limit = ~0ULL) const { + uint64_t getLimitedValue(uint64_t Limit = UINT64_MAX) const { return (getActiveBits() > 64 || getZExtValue() > Limit) ? Limit : getZExtValue(); } @@ -418,6 +420,36 @@ public: /// width without remainder. bool isSplat(unsigned SplatSizeInBits) const; + /// \returns true if this APInt value is a sequence of \param numBits ones + /// starting at the least significant bit with the remainder zero. + bool isMask(unsigned numBits) const { + assert(numBits != 0 && "numBits must be non-zero"); + assert(numBits <= BitWidth && "numBits out of range"); + if (isSingleWord()) + return VAL == (UINT64_MAX >> (APINT_BITS_PER_WORD - numBits)); + unsigned Ones = countTrailingOnes(); + return (numBits == Ones) && ((Ones + countLeadingZeros()) == BitWidth); + } + + /// \returns true if this APInt is a non-empty sequence of ones starting at + /// the least significant bit with the remainder zero. + /// Ex. isMask(0x0000FFFFU) == true. + bool isMask() const { + if (isSingleWord()) + return isMask_64(VAL); + unsigned Ones = countTrailingOnes(); + return (Ones > 0) && ((Ones + countLeadingZeros()) == BitWidth); + } + + /// \brief Return true if this APInt value contains a sequence of ones with + /// the remainder zero. + bool isShiftedMask() const { + if (isSingleWord()) + return isShiftedMask_64(VAL); + unsigned Ones = countPopulation(); + return (Ones + countTrailingZeros() + countLeadingZeros()) == BitWidth; + } + /// @} /// \name Value Generators /// @{ @@ -501,12 +533,26 @@ public: /// /// \returns An APInt value with the requested bits set. static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit) { - assert(hiBit <= numBits && "hiBit out of range"); - assert(loBit < numBits && "loBit out of range"); - if (hiBit < loBit) - return getLowBitsSet(numBits, hiBit) | - getHighBitsSet(numBits, numBits - loBit); - return getLowBitsSet(numBits, hiBit - loBit).shl(loBit); + APInt Res(numBits, 0); + Res.setBits(loBit, hiBit); + return Res; + } + + /// \brief Get a value with upper bits starting at loBit set. + /// + /// Constructs an APInt value that has a contiguous range of bits set. The + /// bits from loBit (inclusive) to numBits (exclusive) will be set. All other + /// bits will be zero. For example, with parameters(32, 12) you would get + /// 0xFFFFF000. + /// + /// \param numBits the intended bit width of the result + /// \param loBit the index of the lowest bit to set. + /// + /// \returns An APInt value with the requested bits set. + static APInt getBitsSetFrom(unsigned numBits, unsigned loBit) { + APInt Res(numBits, 0); + Res.setBitsFrom(loBit); + return Res; } /// \brief Get a value with high bits set @@ -516,15 +562,9 @@ public: /// \param numBits the bitwidth of the result /// \param hiBitsSet the number of high-order bits set in the result. static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet) { - assert(hiBitsSet <= numBits && "Too many bits to set!"); - // Handle a degenerate case, to avoid shifting by word size - if (hiBitsSet == 0) - return APInt(numBits, 0); - unsigned shiftAmt = numBits - hiBitsSet; - // For small values, return quickly - if (numBits <= APINT_BITS_PER_WORD) - return APInt(numBits, ~0ULL << shiftAmt); - return getAllOnesValue(numBits).shl(shiftAmt); + APInt Res(numBits, 0); + Res.setHighBits(hiBitsSet); + return Res; } /// \brief Get a value with low bits set @@ -534,16 +574,9 @@ public: /// \param numBits the bitwidth of the result /// \param loBitsSet the number of low-order bits set in the result. static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet) { - assert(loBitsSet <= numBits && "Too many bits to set!"); - // Handle a degenerate case, to avoid shifting by word size - if (loBitsSet == 0) - return APInt(numBits, 0); - if (loBitsSet == APINT_BITS_PER_WORD) - return APInt(numBits, UINT64_MAX); - // For small values, return quickly. - if (loBitsSet <= APINT_BITS_PER_WORD) - return APInt(numBits, UINT64_MAX >> (APINT_BITS_PER_WORD - loBitsSet)); - return getAllOnesValue(numBits).lshr(numBits - loBitsSet); + APInt Res(numBits, 0); + Res.setLowBits(loBitsSet); + return Res; } /// \brief Return a value containing V broadcasted over NewLen bits. @@ -613,30 +646,13 @@ public: /// \returns *this decremented by one. APInt &operator--(); - /// \brief Unary bitwise complement operator. - /// - /// Performs a bitwise complement operation on this APInt. - /// - /// \returns an APInt that is the bitwise complement of *this - APInt operator~() const { - APInt Result(*this); - Result.flipAllBits(); - return Result; - } - /// \brief Logical negation operator. /// /// Performs logical negation operation on this APInt. /// /// \returns true if *this is zero, false otherwise. bool operator!() const { - if (isSingleWord()) - return !VAL; - - for (unsigned i = 0; i != getNumWords(); ++i) - if (pVal[i]) - return false; - return true; + return *this == 0; } /// @} @@ -688,7 +704,16 @@ public: /// than 64, the value is zero filled in the unspecified high order bits. /// /// \returns *this after assignment of RHS value. - APInt &operator=(uint64_t RHS); + APInt &operator=(uint64_t RHS) { + if (isSingleWord()) { + VAL = RHS; + clearUnusedBits(); + } else { + pVal[0] = RHS; + memset(pVal+1, 0, (getNumWords() - 1) * APINT_WORD_SIZE); + } + return *this; + } /// \brief Bitwise AND assignment operator. /// @@ -696,7 +721,29 @@ public: /// assigned to *this. /// /// \returns *this after ANDing with RHS. - APInt &operator&=(const APInt &RHS); + APInt &operator&=(const APInt &RHS) { + assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); + if (isSingleWord()) { + VAL &= RHS.VAL; + return *this; + } + return AndAssignSlowCase(RHS); + } + + /// \brief Bitwise AND assignment operator. + /// + /// Performs a bitwise AND operation on this APInt and RHS. RHS is + /// logically zero-extended or truncated to match the bit-width of + /// the LHS. + APInt &operator&=(uint64_t RHS) { + if (isSingleWord()) { + VAL &= RHS; + return *this; + } + pVal[0] &= RHS; + memset(pVal+1, 0, (getNumWords() - 1) * APINT_WORD_SIZE); + return *this; + } /// \brief Bitwise OR assignment operator. /// @@ -704,7 +751,14 @@ public: /// assigned *this; /// /// \returns *this after ORing with RHS. - APInt &operator|=(const APInt &RHS); + APInt &operator|=(const APInt &RHS) { + assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); + if (isSingleWord()) { + VAL |= RHS.VAL; + return *this; + } + return OrAssignSlowCase(RHS); + } /// \brief Bitwise OR assignment operator. /// @@ -727,7 +781,29 @@ public: /// assigned to *this. /// /// \returns *this after XORing with RHS. - APInt &operator^=(const APInt &RHS); + APInt &operator^=(const APInt &RHS) { + assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); + if (isSingleWord()) { + VAL ^= RHS.VAL; + return *this; + } + return XorAssignSlowCase(RHS); + } + + /// \brief Bitwise XOR assignment operator. + /// + /// Performs a bitwise XOR operation on this APInt and RHS. RHS is + /// logically zero-extended or truncated to match the bit-width of + /// the LHS. + APInt &operator^=(uint64_t RHS) { + if (isSingleWord()) { + VAL ^= RHS; + clearUnusedBits(); + } else { + pVal[0] ^= RHS; + } + return *this; + } /// \brief Multiplication assignment operator. /// @@ -766,59 +842,6 @@ public: /// \name Binary Operators /// @{ - /// \brief Bitwise AND operator. - /// - /// Performs a bitwise AND operation on *this and RHS. - /// - /// \returns An APInt value representing the bitwise AND of *this and RHS. - APInt operator&(const APInt &RHS) const { - assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); - if (isSingleWord()) - return APInt(getBitWidth(), VAL & RHS.VAL); - return AndSlowCase(RHS); - } - APInt And(const APInt &RHS) const { return this->operator&(RHS); } - - /// \brief Bitwise OR operator. - /// - /// Performs a bitwise OR operation on *this and RHS. - /// - /// \returns An APInt value representing the bitwise OR of *this and RHS. - APInt operator|(const APInt &RHS) const { - assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); - if (isSingleWord()) - return APInt(getBitWidth(), VAL | RHS.VAL); - return OrSlowCase(RHS); - } - - /// \brief Bitwise OR function. - /// - /// Performs a bitwise or on *this and RHS. This is implemented by simply - /// calling operator|. - /// - /// \returns An APInt value representing the bitwise OR of *this and RHS. - APInt Or(const APInt &RHS) const { return this->operator|(RHS); } - - /// \brief Bitwise XOR operator. - /// - /// Performs a bitwise XOR operation on *this and RHS. - /// - /// \returns An APInt value representing the bitwise XOR of *this and RHS. - APInt operator^(const APInt &RHS) const { - assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); - if (isSingleWord()) - return APInt(BitWidth, VAL ^ RHS.VAL); - return XorSlowCase(RHS); - } - - /// \brief Bitwise XOR function. - /// - /// Performs a bitwise XOR operation on *this and RHS. This is implemented - /// through the usage of operator^. - /// - /// \returns An APInt value representing the bitwise XOR of *this and RHS. - APInt Xor(const APInt &RHS) const { return this->operator^(RHS); } - /// \brief Multiplication operator. /// /// Multiplies this APInt by RHS and returns the result. @@ -1012,7 +1035,7 @@ public: /// the validity of the less-than relationship. /// /// \returns true if *this < RHS when both are considered unsigned. - bool ult(const APInt &RHS) const; + bool ult(const APInt &RHS) const LLVM_READONLY; /// \brief Unsigned less than comparison /// @@ -1030,7 +1053,7 @@ public: /// validity of the less-than relationship. /// /// \returns true if *this < RHS when both are considered signed. - bool slt(const APInt &RHS) const; + bool slt(const APInt &RHS) const LLVM_READONLY; /// \brief Signed less than comparison /// @@ -1144,7 +1167,11 @@ public: /// This operation tests if there are any pairs of corresponding bits /// between this APInt and RHS that are both set. - bool intersects(const APInt &RHS) const { return (*this & RHS) != 0; } + bool intersects(const APInt &RHS) const { + APInt temp(*this); + temp &= RHS; + return temp != 0; + } /// @} /// \name Resizing Operators @@ -1203,11 +1230,9 @@ public: void setAllBits() { if (isSingleWord()) VAL = UINT64_MAX; - else { + else // Set all the bits in all the words. - for (unsigned i = 0; i < getNumWords(); ++i) - pVal[i] = UINT64_MAX; - } + memset(pVal, -1, getNumWords() * APINT_WORD_SIZE); // Clear the unused ones clearUnusedBits(); } @@ -1217,6 +1242,49 @@ public: /// Set the given bit to 1 whose position is given as "bitPosition". void setBit(unsigned bitPosition); + /// Set the sign bit to 1. + void setSignBit() { + setBit(BitWidth - 1); + } + + /// Set the bits from loBit (inclusive) to hiBit (exclusive) to 1. + void setBits(unsigned loBit, unsigned hiBit) { + assert(hiBit <= BitWidth && "hiBit out of range"); + assert(loBit <= BitWidth && "loBit out of range"); + if (loBit == hiBit) + return; + if (loBit > hiBit) { + setLowBits(hiBit); + setHighBits(BitWidth - loBit); + return; + } + if (loBit < APINT_BITS_PER_WORD && hiBit <= APINT_BITS_PER_WORD) { + uint64_t mask = UINT64_MAX >> (APINT_BITS_PER_WORD - (hiBit - loBit)); + mask <<= loBit; + if (isSingleWord()) + VAL |= mask; + else + pVal[0] |= mask; + } else { + setBitsSlowCase(loBit, hiBit); + } + } + + /// Set the top bits starting from loBit. + void setBitsFrom(unsigned loBit) { + return setBits(loBit, BitWidth); + } + + /// Set the bottom loBits bits. + void setLowBits(unsigned loBits) { + return setBits(0, loBits); + } + + /// Set the top hiBits bits. + void setHighBits(unsigned hiBits) { + return setBits(BitWidth - hiBits, BitWidth); + } + /// \brief Set every bit to 0. void clearAllBits() { if (isSingleWord()) @@ -1232,13 +1300,12 @@ public: /// \brief Toggle every bit to its opposite value. void flipAllBits() { - if (isSingleWord()) + if (isSingleWord()) { VAL ^= UINT64_MAX; - else { - for (unsigned i = 0; i < getNumWords(); ++i) - pVal[i] ^= UINT64_MAX; + clearUnusedBits(); + } else { + flipAllBitsSlowCase(); } - clearUnusedBits(); } /// \brief Toggles a given bit to its opposite value. @@ -1247,6 +1314,12 @@ public: /// as "bitPosition". void flipBit(unsigned bitPosition); + /// Insert the bits from a smaller APInt starting at bitPosition. + void insertBits(const APInt &SubBits, unsigned bitPosition); + + /// Return an APInt with the extracted bits [bitPosition,bitPosition+numBits). + APInt extractBits(unsigned numBits, unsigned bitPosition) const; + /// @} /// \name Value Characterization Functions /// @{ @@ -1356,7 +1429,7 @@ public: /// /// \returns 0 if the high order bit is not set, otherwise returns the number /// of 1 bits from the most significant to the least - unsigned countLeadingOnes() const; + unsigned countLeadingOnes() const LLVM_READONLY; /// Computes the number of leading bits of this APInt that are equal to its /// sign bit. @@ -1372,7 +1445,7 @@ public: /// /// \returns BitWidth if the value is zero, otherwise returns the number of /// zeros from the least significant bit to the first one bit. - unsigned countTrailingZeros() const; + unsigned countTrailingZeros() const LLVM_READONLY; /// \brief Count the number of trailing one bits. /// @@ -1589,46 +1662,46 @@ public: /// Sets the least significant part of a bignum to the input value, and zeroes /// out higher parts. - static void tcSet(integerPart *, integerPart, unsigned int); + static void tcSet(WordType *, WordType, unsigned); /// Assign one bignum to another. - static void tcAssign(integerPart *, const integerPart *, unsigned int); + static void tcAssign(WordType *, const WordType *, unsigned); /// Returns true if a bignum is zero, false otherwise. - static bool tcIsZero(const integerPart *, unsigned int); + static bool tcIsZero(const WordType *, unsigned); /// Extract the given bit of a bignum; returns 0 or 1. Zero-based. - static int tcExtractBit(const integerPart *, unsigned int bit); + static int tcExtractBit(const WordType *, unsigned bit); /// Copy the bit vector of width srcBITS from SRC, starting at bit srcLSB, to /// DST, of dstCOUNT parts, such that the bit srcLSB becomes the least /// significant bit of DST. All high bits above srcBITS in DST are /// zero-filled. - static void tcExtract(integerPart *, unsigned int dstCount, - const integerPart *, unsigned int srcBits, - unsigned int srcLSB); + static void tcExtract(WordType *, unsigned dstCount, + const WordType *, unsigned srcBits, + unsigned srcLSB); /// Set the given bit of a bignum. Zero-based. - static void tcSetBit(integerPart *, unsigned int bit); + static void tcSetBit(WordType *, unsigned bit); /// Clear the given bit of a bignum. Zero-based. - static void tcClearBit(integerPart *, unsigned int bit); + static void tcClearBit(WordType *, unsigned bit); /// Returns the bit number of the least or most significant set bit of a /// number. If the input number has no bits set -1U is returned. - static unsigned int tcLSB(const integerPart *, unsigned int); - static unsigned int tcMSB(const integerPart *parts, unsigned int n); + static unsigned tcLSB(const WordType *, unsigned n); + static unsigned tcMSB(const WordType *parts, unsigned n); /// Negate a bignum in-place. - static void tcNegate(integerPart *, unsigned int); + static void tcNegate(WordType *, unsigned); /// DST += RHS + CARRY where CARRY is zero or one. Returns the carry flag. - static integerPart tcAdd(integerPart *, const integerPart *, - integerPart carry, unsigned); + static WordType tcAdd(WordType *, const WordType *, + WordType carry, unsigned); /// DST -= RHS + CARRY where CARRY is zero or one. Returns the carry flag. - static integerPart tcSubtract(integerPart *, const integerPart *, - integerPart carry, unsigned); + static WordType tcSubtract(WordType *, const WordType *, + WordType carry, unsigned); /// DST += SRC * MULTIPLIER + PART if add is true /// DST = SRC * MULTIPLIER + PART if add is false @@ -1640,23 +1713,23 @@ public: /// Otherwise DST is filled with the least significant DSTPARTS parts of the /// result, and if all of the omitted higher parts were zero return zero, /// otherwise overflow occurred and return one. - static int tcMultiplyPart(integerPart *dst, const integerPart *src, - integerPart multiplier, integerPart carry, - unsigned int srcParts, unsigned int dstParts, + static int tcMultiplyPart(WordType *dst, const WordType *src, + WordType multiplier, WordType carry, + unsigned srcParts, unsigned dstParts, bool add); /// DST = LHS * RHS, where DST has the same width as the operands and is /// filled with the least significant parts of the result. Returns one if /// overflow occurred, otherwise zero. DST must be disjoint from both /// operands. - static int tcMultiply(integerPart *, const integerPart *, const integerPart *, + static int tcMultiply(WordType *, const WordType *, const WordType *, unsigned); /// DST = LHS * RHS, where DST has width the sum of the widths of the /// operands. No overflow occurs. DST must be disjoint from both /// operands. Returns the number of parts required to hold the result. - static unsigned int tcFullMultiply(integerPart *, const integerPart *, - const integerPart *, unsigned, unsigned); + static unsigned tcFullMultiply(WordType *, const WordType *, + const WordType *, unsigned, unsigned); /// If RHS is zero LHS and REMAINDER are left unchanged, return one. /// Otherwise set LHS to LHS / RHS with the fractional part discarded, set @@ -1667,38 +1740,35 @@ public: /// SCRATCH is a bignum of the same size as the operands and result for use by /// the routine; its contents need not be initialized and are destroyed. LHS, /// REMAINDER and SCRATCH must be distinct. - static int tcDivide(integerPart *lhs, const integerPart *rhs, - integerPart *remainder, integerPart *scratch, - unsigned int parts); + static int tcDivide(WordType *lhs, const WordType *rhs, + WordType *remainder, WordType *scratch, + unsigned parts); /// Shift a bignum left COUNT bits. Shifted in bits are zero. There are no /// restrictions on COUNT. - static void tcShiftLeft(integerPart *, unsigned int parts, - unsigned int count); + static void tcShiftLeft(WordType *, unsigned parts, unsigned count); /// Shift a bignum right COUNT bits. Shifted in bits are zero. There are no /// restrictions on COUNT. - static void tcShiftRight(integerPart *, unsigned int parts, - unsigned int count); + static void tcShiftRight(WordType *, unsigned parts, unsigned count); /// The obvious AND, OR and XOR and complement operations. - static void tcAnd(integerPart *, const integerPart *, unsigned int); - static void tcOr(integerPart *, const integerPart *, unsigned int); - static void tcXor(integerPart *, const integerPart *, unsigned int); - static void tcComplement(integerPart *, unsigned int); + static void tcAnd(WordType *, const WordType *, unsigned); + static void tcOr(WordType *, const WordType *, unsigned); + static void tcXor(WordType *, const WordType *, unsigned); + static void tcComplement(WordType *, unsigned); /// Comparison (unsigned) of two bignums. - static int tcCompare(const integerPart *, const integerPart *, unsigned int); + static int tcCompare(const WordType *, const WordType *, unsigned); /// Increment a bignum in-place. Return the carry flag. - static integerPart tcIncrement(integerPart *, unsigned int); + static WordType tcIncrement(WordType *, unsigned); /// Decrement a bignum in-place. Return the borrow flag. - static integerPart tcDecrement(integerPart *, unsigned int); + static WordType tcDecrement(WordType *, unsigned); /// Set the least significant BITS and clear the rest. - static void tcSetLeastSignificantBits(integerPart *, unsigned int, - unsigned int bits); + static void tcSetLeastSignificantBits(WordType *, unsigned, unsigned bits); /// \brief debug method void dump() const; @@ -1723,6 +1793,74 @@ inline bool operator==(uint64_t V1, const APInt &V2) { return V2 == V1; } inline bool operator!=(uint64_t V1, const APInt &V2) { return V2 != V1; } +/// \brief Unary bitwise complement operator. +/// +/// \returns an APInt that is the bitwise complement of \p v. +inline APInt operator~(APInt v) { + v.flipAllBits(); + return v; +} + +inline APInt operator&(APInt a, const APInt &b) { + a &= b; + return a; +} + +inline APInt operator&(const APInt &a, APInt &&b) { + b &= a; + return std::move(b); +} + +inline APInt operator&(APInt a, uint64_t RHS) { + a &= RHS; + return a; +} + +inline APInt operator&(uint64_t LHS, APInt b) { + b &= LHS; + return b; +} + +inline APInt operator|(APInt a, const APInt &b) { + a |= b; + return a; +} + +inline APInt operator|(const APInt &a, APInt &&b) { + b |= a; + return std::move(b); +} + +inline APInt operator|(APInt a, uint64_t RHS) { + a |= RHS; + return a; +} + +inline APInt operator|(uint64_t LHS, APInt b) { + b |= LHS; + return b; +} + +inline APInt operator^(APInt a, const APInt &b) { + a ^= b; + return a; +} + +inline APInt operator^(const APInt &a, APInt &&b) { + b ^= a; + return std::move(b); +} + +inline APInt operator^(APInt a, uint64_t RHS) { + a ^= RHS; + return a; +} + +inline APInt operator^(uint64_t LHS, APInt b) { + b ^= LHS; + return b; +} + inline raw_ostream &operator<<(raw_ostream &OS, const APInt &I) { I.print(OS, true); return OS; @@ -1799,47 +1937,13 @@ inline const APInt &umax(const APInt &A, const APInt &B) { return A.ugt(B) ? A : B; } -/// \brief Check if the specified APInt has a N-bits unsigned integer value. -inline bool isIntN(unsigned N, const APInt &APIVal) { return APIVal.isIntN(N); } - -/// \brief Check if the specified APInt has a N-bits signed integer value. -inline bool isSignedIntN(unsigned N, const APInt &APIVal) { - return APIVal.isSignedIntN(N); -} - -/// \returns true if the argument APInt value is a sequence of ones starting at -/// the least significant bit with the remainder zero. -inline bool isMask(unsigned numBits, const APInt &APIVal) { - return numBits <= APIVal.getBitWidth() && - APIVal == APInt::getLowBitsSet(APIVal.getBitWidth(), numBits); -} - -/// \returns true if the argument is a non-empty sequence of ones starting at -/// the least significant bit with the remainder zero (32 bit version). -/// Ex. isMask(0x0000FFFFU) == true. -inline bool isMask(const APInt &Value) { - return (Value != 0) && ((Value + 1) & Value) == 0; -} - -/// \brief Return true if the argument APInt value contains a sequence of ones -/// with the remainder zero. -inline bool isShiftedMask(unsigned numBits, const APInt &APIVal) { - return isMask(numBits, (APIVal - APInt(numBits, 1)) | APIVal); -} - -/// \brief Returns a byte-swapped representation of the specified APInt Value. -inline APInt byteSwap(const APInt &APIVal) { return APIVal.byteSwap(); } - -/// \brief Returns the floor log base 2 of the specified APInt value. -inline unsigned logBase2(const APInt &APIVal) { return APIVal.logBase2(); } - /// \brief Compute GCD of two APInt values. /// /// This function returns the greatest common divisor of the two APInt values /// using Euclid's algorithm. /// -/// \returns the greatest common divisor of Val1 and Val2 -APInt GreatestCommonDivisor(const APInt &Val1, const APInt &Val2); +/// \returns the greatest common divisor of A and B. +APInt GreatestCommonDivisor(APInt A, APInt B); /// \brief Converts the given APInt to a double value. /// @@ -1879,83 +1983,6 @@ inline APInt RoundFloatToAPInt(float Float, unsigned width) { return RoundDoubleToAPInt(double(Float), width); } -/// \brief Arithmetic right-shift function. -/// -/// Arithmetic right-shift the APInt by shiftAmt. -inline APInt ashr(const APInt &LHS, unsigned shiftAmt) { - return LHS.ashr(shiftAmt); -} - -/// \brief Logical right-shift function. -/// -/// Logical right-shift the APInt by shiftAmt. -inline APInt lshr(const APInt &LHS, unsigned shiftAmt) { - return LHS.lshr(shiftAmt); -} - -/// \brief Left-shift function. -/// -/// Left-shift the APInt by shiftAmt. -inline APInt shl(const APInt &LHS, unsigned shiftAmt) { - return LHS.shl(shiftAmt); -} - -/// \brief Signed division function for APInt. -/// -/// Signed divide APInt LHS by APInt RHS. -inline APInt sdiv(const APInt &LHS, const APInt &RHS) { return LHS.sdiv(RHS); } - -/// \brief Unsigned division function for APInt. -/// -/// Unsigned divide APInt LHS by APInt RHS. -inline APInt udiv(const APInt &LHS, const APInt &RHS) { return LHS.udiv(RHS); } - -/// \brief Function for signed remainder operation. -/// -/// Signed remainder operation on APInt. -inline APInt srem(const APInt &LHS, const APInt &RHS) { return LHS.srem(RHS); } - -/// \brief Function for unsigned remainder operation. -/// -/// Unsigned remainder operation on APInt. -inline APInt urem(const APInt &LHS, const APInt &RHS) { return LHS.urem(RHS); } - -/// \brief Function for multiplication operation. -/// -/// Performs multiplication on APInt values. -inline APInt mul(const APInt &LHS, const APInt &RHS) { return LHS * RHS; } - -/// \brief Function for addition operation. -/// -/// Performs addition on APInt values. -inline APInt add(const APInt &LHS, const APInt &RHS) { return LHS + RHS; } - -/// \brief Function for subtraction operation. -/// -/// Performs subtraction on APInt values. -inline APInt sub(const APInt &LHS, const APInt &RHS) { return LHS - RHS; } - -/// \brief Bitwise AND function for APInt. -/// -/// Performs bitwise AND operation on APInt LHS and -/// APInt RHS. -inline APInt And(const APInt &LHS, const APInt &RHS) { return LHS & RHS; } - -/// \brief Bitwise OR function for APInt. -/// -/// Performs bitwise OR operation on APInt LHS and APInt RHS. -inline APInt Or(const APInt &LHS, const APInt &RHS) { return LHS | RHS; } - -/// \brief Bitwise XOR function for APInt. -/// -/// Performs bitwise XOR operation on APInt. -inline APInt Xor(const APInt &LHS, const APInt &RHS) { return LHS ^ RHS; } - -/// \brief Bitwise complement function. -/// -/// Performs a bitwise complement operation on APInt. -inline APInt Not(const APInt &APIVal) { return ~APIVal; } - } // End of APIntOps namespace // See friend declaration above. This additional declaration is required in diff --git a/include/llvm/ADT/ArrayRef.h b/include/llvm/ADT/ArrayRef.h index b3fe31f4a806dcdc05aa58606fd2485112c5910c..6b35d0aec8b2b2f90756ac5177bd464843515870 100644 --- a/include/llvm/ADT/ArrayRef.h +++ b/include/llvm/ADT/ArrayRef.h @@ -487,6 +487,18 @@ namespace llvm { return ArrayRef(Arr); } + /// Construct a MutableArrayRef from a single element. + template + MutableArrayRef makeMutableArrayRef(T &OneElt) { + return OneElt; + } + + /// Construct a MutableArrayRef from a pointer and length. + template + MutableArrayRef makeMutableArrayRef(T *data, size_t length) { + return MutableArrayRef(data, length); + } + /// @} /// @name ArrayRef Comparison Operators /// @{ diff --git a/include/llvm/ADT/BitVector.h b/include/llvm/ADT/BitVector.h index cb318199ec778b852c4eb27a62e93a67cd66923d..8240d01ae977c7dc75179f997cf5eadb954ca590 100644 --- a/include/llvm/ADT/BitVector.h +++ b/include/llvm/ADT/BitVector.h @@ -161,6 +161,17 @@ public: return -1; } + /// find_first_unset - Returns the index of the first unset bit, -1 if all + /// of the bits are set. + int find_first_unset() const { + for (unsigned i = 0; i < NumBitWords(size()); ++i) + if (Bits[i] != ~0UL) { + unsigned Result = i * BITWORD_SIZE + countTrailingOnes(Bits[i]); + return Result < size() ? Result : -1; + } + return -1; + } + /// find_next - Returns the index of the next set bit following the /// "Prev" bit. Returns -1 if the next set bit is not found. int find_next(unsigned Prev) const { @@ -184,6 +195,30 @@ public: return -1; } + /// find_next_unset - Returns the index of the next usnet bit following the + /// "Prev" bit. Returns -1 if all remaining bits are set. + int find_next_unset(unsigned Prev) const { + ++Prev; + if (Prev >= Size) + return -1; + + unsigned WordPos = Prev / BITWORD_SIZE; + unsigned BitPos = Prev % BITWORD_SIZE; + BitWord Copy = Bits[WordPos]; + // Mask in previous bits. + BitWord Mask = (1 << BitPos) - 1; + Copy |= Mask; + + if (Copy != ~0UL) + return next_unset_in_word(WordPos, Copy); + + // Check subsequent words. + for (unsigned i = WordPos + 1; i < NumBitWords(size()); ++i) + if (Bits[i] != ~0UL) + return next_unset_in_word(i, Bits[i]); + return -1; + } + /// clear - Clear all bits. void clear() { Size = 0; @@ -503,6 +538,11 @@ public: } private: + int next_unset_in_word(int WordIndex, BitWord Word) const { + unsigned Result = WordIndex * BITWORD_SIZE + countTrailingOnes(Word); + return Result < size() ? Result : -1; + } + unsigned NumBitWords(unsigned S) const { return (S + BITWORD_SIZE-1) / BITWORD_SIZE; } diff --git a/include/llvm/ADT/BreadthFirstIterator.h b/include/llvm/ADT/BreadthFirstIterator.h new file mode 100644 index 0000000000000000000000000000000000000000..eaeecb6e057ffecf0e506193a205a9e3e51f6c56 --- /dev/null +++ b/include/llvm/ADT/BreadthFirstIterator.h @@ -0,0 +1,164 @@ +//===- llvm/ADT/BreadthFirstIterator.h - Breadth First iterator -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file builds on the ADT/GraphTraits.h file to build a generic breadth +// first graph iterator. This file exposes the following functions/types: +// +// bf_begin/bf_end/bf_iterator +// * Normal breadth-first iteration - visit a graph level-by-level. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ADT_BREADTHFIRSTITERATOR_H +#define LLVM_ADT_BREADTHFIRSTITERATOR_H + +#include "llvm/ADT/GraphTraits.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/iterator_range.h" +#include +#include +#include +#include + +namespace llvm { + +// bf_iterator_storage - A private class which is used to figure out where to +// store the visited set. We only provide a non-external variant for now. +template class bf_iterator_storage { +public: + SetType Visited; +}; + +// The visited state for the iteration is a simple set. +template +using bf_iterator_default_set = SmallPtrSet; + +// Generic Breadth first search iterator. +template ::NodeRef>, + class GT = GraphTraits> +class bf_iterator + : public std::iterator, + public bf_iterator_storage { + typedef std::iterator super; + + typedef typename GT::NodeRef NodeRef; + typedef typename GT::ChildIteratorType ChildItTy; + + // First element is the node reference, second is the next child to visit. + typedef std::pair> QueueElement; + + // Visit queue - used to maintain BFS ordering. + // Optional<> because we need markers for levels. + std::queue> VisitQueue; + + // Current level. + unsigned Level; + +private: + inline bf_iterator(NodeRef Node) { + this->Visited.insert(Node); + Level = 0; + + // Also, insert a dummy node as marker. + VisitQueue.push(QueueElement(Node, None)); + VisitQueue.push(None); + } + + inline bf_iterator() = default; + + inline void toNext() { + Optional Head = VisitQueue.front(); + QueueElement H = Head.getValue(); + NodeRef Node = H.first; + Optional &ChildIt = H.second; + + if (!ChildIt) + ChildIt.emplace(GT::child_begin(Node)); + while (*ChildIt != GT::child_end(Node)) { + NodeRef Next = *(*ChildIt)++; + + // Already visited? + if (this->Visited.insert(Next).second) + VisitQueue.push(QueueElement(Next, None)); + } + VisitQueue.pop(); + + // Go to the next element skipping markers if needed. + if (!VisitQueue.empty()) { + Head = VisitQueue.front(); + if (Head != None) + return; + Level += 1; + VisitQueue.pop(); + + // Don't push another marker if this is the last + // element. + if (!VisitQueue.empty()) + VisitQueue.push(None); + } + } + +public: + typedef typename super::pointer pointer; + + // Provide static begin and end methods as our public "constructors" + static bf_iterator begin(const GraphT &G) { + return bf_iterator(GT::getEntryNode(G)); + } + + static bf_iterator end(const GraphT &G) { return bf_iterator(); } + + bool operator==(const bf_iterator &RHS) const { + return VisitQueue == RHS.VisitQueue; + } + + bool operator!=(const bf_iterator &RHS) const { return !(*this == RHS); } + + const NodeRef &operator*() const { return VisitQueue.front()->first; } + + // This is a nonstandard operator-> that dereferenfces the pointer an extra + // time so that you can actually call methods on the node, because the + // contained type is a pointer. + NodeRef operator->() const { return **this; } + + bf_iterator &operator++() { // Pre-increment + toNext(); + return *this; + } + + bf_iterator operator++(int) { // Post-increment + bf_iterator ItCopy = *this; + ++*this; + return ItCopy; + } + + unsigned getLevel() const { return Level; } +}; + +// Provide global constructors that automatically figure out correct types. +template bf_iterator bf_begin(const T &G) { + return bf_iterator::begin(G); +} + +template bf_iterator bf_end(const T &G) { + return bf_iterator::end(G); +} + +// Provide an accessor method to use them in range-based patterns. +template iterator_range> breadth_first(const T &G) { + return make_range(bf_begin(G), bf_end(G)); +} + +} // end namespace llvm + +#endif // LLVM_ADT_BREADTHFIRSTITERATOR_H diff --git a/include/llvm/ADT/DenseMap.h b/include/llvm/ADT/DenseMap.h index 0b4b09d4b7330b1bb174eca1e0034743d2f9a79f..fd8d3bf368a8866f52ceaa41da6e98b51b94a3b9 100644 --- a/include/llvm/ADT/DenseMap.h +++ b/include/llvm/ADT/DenseMap.h @@ -53,6 +53,9 @@ class DenseMapIterator; template class DenseMapBase : public DebugEpochBase { + template + using const_arg_type_t = typename const_pointer_or_const_ref::type; + public: typedef unsigned size_type; typedef KeyT key_type; @@ -119,18 +122,18 @@ public: } /// Return 1 if the specified key is in the map, 0 otherwise. - size_type count(const KeyT &Val) const { + size_type count(const_arg_type_t Val) const { const BucketT *TheBucket; return LookupBucketFor(Val, TheBucket) ? 1 : 0; } - iterator find(const KeyT &Val) { + iterator find(const_arg_type_t Val) { BucketT *TheBucket; if (LookupBucketFor(Val, TheBucket)) return iterator(TheBucket, getBucketsEnd(), *this, true); return end(); } - const_iterator find(const KeyT &Val) const { + const_iterator find(const_arg_type_t Val) const { const BucketT *TheBucket; if (LookupBucketFor(Val, TheBucket)) return const_iterator(TheBucket, getBucketsEnd(), *this, true); @@ -159,7 +162,7 @@ public: /// lookup - Return the entry for the specified key, or a default /// constructed value if no such entry exists. - ValueT lookup(const KeyT &Val) const { + ValueT lookup(const_arg_type_t Val) const { const BucketT *TheBucket; if (LookupBucketFor(Val, TheBucket)) return TheBucket->getSecond(); @@ -389,6 +392,8 @@ protected: return KeyInfoT::getHashValue(Val); } static const KeyT getEmptyKey() { + static_assert(std::is_base_of::value, + "Must pass the derived type to this template!"); return KeyInfoT::getEmptyKey(); } static const KeyT getTombstoneKey() { diff --git a/include/llvm/ADT/DenseMapInfo.h b/include/llvm/ADT/DenseMapInfo.h index a844ebcccf5b89306186097295bb4d9b5f4b86a4..bb973ac65063428ed5ea8e7e2059a413961b885c 100644 --- a/include/llvm/ADT/DenseMapInfo.h +++ b/include/llvm/ADT/DenseMapInfo.h @@ -60,6 +60,16 @@ template<> struct DenseMapInfo { } }; +// Provide DenseMapInfo for unsigned shorts. +template <> struct DenseMapInfo { + static inline unsigned short getEmptyKey() { return 0xFFFF; } + static inline unsigned short getTombstoneKey() { return 0xFFFF - 1; } + static unsigned getHashValue(const unsigned short &Val) { return Val * 37U; } + static bool isEqual(const unsigned short &LHS, const unsigned short &RHS) { + return LHS == RHS; + } +}; + // Provide DenseMapInfo for unsigned ints. template<> struct DenseMapInfo { static inline unsigned getEmptyKey() { return ~0U; } @@ -95,6 +105,14 @@ template<> struct DenseMapInfo { } }; +// Provide DenseMapInfo for shorts. +template <> struct DenseMapInfo { + static inline short getEmptyKey() { return 0x7FFF; } + static inline short getTombstoneKey() { return -0x7FFF - 1; } + static unsigned getHashValue(const short &Val) { return Val * 37U; } + static bool isEqual(const short &LHS, const short &RHS) { return LHS == RHS; } +}; + // Provide DenseMapInfo for ints. template<> struct DenseMapInfo { static inline int getEmptyKey() { return 0x7fffffff; } diff --git a/include/llvm/ADT/DenseSet.h b/include/llvm/ADT/DenseSet.h index b1345f7da7388deed693d74c9e48703fcb13d4f1..fcf304c3ecc41060846c2f327515a04536a25917 100644 --- a/include/llvm/ADT/DenseSet.h +++ b/include/llvm/ADT/DenseSet.h @@ -48,6 +48,8 @@ class DenseSetImpl { static_assert(sizeof(typename MapTy::value_type) == sizeof(ValueT), "DenseMap buckets unexpectedly large!"); MapTy TheMap; + template + using const_arg_type_t = typename const_pointer_or_const_ref::type; public: typedef ValueT key_type; @@ -78,7 +80,7 @@ public: } /// Return 1 if the specified key is in the set, 0 otherwise. - size_type count(const ValueT &V) const { + size_type count(const_arg_type_t V) const { return TheMap.count(V); } @@ -154,8 +156,8 @@ public: const_iterator begin() const { return ConstIterator(TheMap.begin()); } const_iterator end() const { return ConstIterator(TheMap.end()); } - iterator find(const ValueT &V) { return Iterator(TheMap.find(V)); } - const_iterator find(const ValueT &V) const { + iterator find(const_arg_type_t V) { return Iterator(TheMap.find(V)); } + const_iterator find(const_arg_type_t V) const { return ConstIterator(TheMap.find(V)); } diff --git a/include/llvm/ADT/DepthFirstIterator.h b/include/llvm/ADT/DepthFirstIterator.h index c54573204588ea3ce99c9f5f1a846a37e783e7c9..b020d48cb3f082d4366da13498ea8a84fcb80baa 100644 --- a/include/llvm/ADT/DepthFirstIterator.h +++ b/include/llvm/ADT/DepthFirstIterator.h @@ -135,7 +135,7 @@ private: } } this->Visited.completed(Node); - + // Oops, ran out of successors... go up a level on the stack. VisitStack.pop_back(); } while (!VisitStack.empty()); diff --git a/include/llvm/ADT/GraphTraits.h b/include/llvm/ADT/GraphTraits.h index 29bbcb010eeef3266fbff411d9acc2948cb09f2a..2c88c4271b4895d6d305c25f5f430fb984ab7f79 100644 --- a/include/llvm/ADT/GraphTraits.h +++ b/include/llvm/ADT/GraphTraits.h @@ -18,6 +18,8 @@ #ifndef LLVM_ADT_GRAPHTRAITS_H #define LLVM_ADT_GRAPHTRAITS_H +#include "llvm/ADT/iterator_range.h" + namespace llvm { // GraphTraits - This class should be specialized by different graph types... @@ -86,6 +88,33 @@ struct Inverse { // inverse falls back to the original graph. template struct GraphTraits>> : GraphTraits {}; +// Provide iterator ranges for the graph traits nodes and children +template +iterator_range::nodes_iterator> +nodes(const GraphType &G) { + return make_range(GraphTraits::nodes_begin(G), + GraphTraits::nodes_end(G)); +} +template +iterator_range>::nodes_iterator> +inverse_nodes(const GraphType &G) { + return make_range(GraphTraits>::nodes_begin(G), + GraphTraits>::nodes_end(G)); +} + +template +iterator_range::ChildIteratorType> +children(const typename GraphTraits::NodeRef &G) { + return make_range(GraphTraits::child_begin(G), + GraphTraits::child_end(G)); +} + +template +iterator_range>::ChildIteratorType> +inverse_children(const typename GraphTraits::NodeRef &G) { + return make_range(GraphTraits>::child_begin(G), + GraphTraits>::child_end(G)); +} } // End llvm namespace #endif diff --git a/include/llvm/ADT/PointerUnion.h b/include/llvm/ADT/PointerUnion.h index a8ac18645f3ab75db17d6d219f829ca982597535..9eb15524c0f3059918d08e37e5596e3ea9e53f31 100644 --- a/include/llvm/ADT/PointerUnion.h +++ b/include/llvm/ADT/PointerUnion.h @@ -31,7 +31,7 @@ template struct PointerUnionTypeSelectorReturn { /// Get a type based on whether two types are the same or not. /// /// For: -/// +/// /// \code /// typedef typename PointerUnionTypeSelector::Return Ret; /// \endcode @@ -190,17 +190,17 @@ public: }; template -static bool operator==(PointerUnion lhs, PointerUnion rhs) { +bool operator==(PointerUnion lhs, PointerUnion rhs) { return lhs.getOpaqueValue() == rhs.getOpaqueValue(); } template -static bool operator!=(PointerUnion lhs, PointerUnion rhs) { +bool operator!=(PointerUnion lhs, PointerUnion rhs) { return lhs.getOpaqueValue() != rhs.getOpaqueValue(); } template -static bool operator<(PointerUnion lhs, PointerUnion rhs) { +bool operator<(PointerUnion lhs, PointerUnion rhs) { return lhs.getOpaqueValue() < rhs.getOpaqueValue(); } diff --git a/include/llvm/ADT/PostOrderIterator.h b/include/llvm/ADT/PostOrderIterator.h index e519b5c07964ae104124c889c4a7a51dd99a4996..8fc08eb252eb214c6e041747f8cb454a3d5cf49b 100644 --- a/include/llvm/ADT/PostOrderIterator.h +++ b/include/llvm/ADT/PostOrderIterator.h @@ -268,6 +268,10 @@ inverse_post_order_ext(const T &G, SetType &S) { // with a postorder iterator to build the data structures). The moral of this // story is: Don't create more ReversePostOrderTraversal classes than necessary. // +// Because it does the traversal in its constructor, it won't invalidate when +// BasicBlocks are removed, *but* it may contain erased blocks. Some places +// rely on this behavior (i.e. GVN). +// // This class should be used like this: // { // ReversePostOrderTraversal RPOT(FuncPtr); // Expensive to create diff --git a/include/llvm/ADT/STLExtras.h b/include/llvm/ADT/STLExtras.h index ec121e0d55cd44bf3275959fca61837ce42712c7..15945adbe589a4634e2dbf2738866467db2b9a94 100644 --- a/include/llvm/ADT/STLExtras.h +++ b/include/llvm/ADT/STLExtras.h @@ -23,11 +23,13 @@ #include // for qsort #include #include +#include #include #include #include // for std::pair #include "llvm/ADT/Optional.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/iterator.h" #include "llvm/ADT/iterator_range.h" #include "llvm/Support/Compiler.h" @@ -44,6 +46,10 @@ namespace detail { template using IterOfRange = decltype(std::begin(std::declval())); +template +using ValueOfRange = typename std::remove_reference()))>::type; + } // End detail namespace //===----------------------------------------------------------------------===// @@ -123,7 +129,7 @@ inline void deleter(T *Ptr) { //===----------------------------------------------------------------------===// // mapped_iterator - This is a simple iterator adapter that causes a function to -// be dereferenced whenever operator* is invoked on the iterator. +// be applied whenever operator* is invoked on the iterator. // template class mapped_iterator { @@ -134,9 +140,8 @@ public: iterator_category; typedef typename std::iterator_traits::difference_type difference_type; - typedef typename std::result_of< - UnaryFunc(decltype(*std::declval()))> - ::type value_type; + typedef decltype(std::declval()(*std::declval())) + value_type; typedef void pointer; //typedef typename UnaryFunc::result_type *pointer; @@ -356,65 +361,126 @@ template struct index_sequence; template struct index_sequence_for; namespace detail { -template class zip_first { -public: - typedef std::input_iterator_tag iterator_category; - typedef std::tuple())...> value_type; +using std::declval; + +// We have to alias this since inlining the actual type at the usage site +// in the parameter list of iterator_facade_base<> below ICEs MSVC 2017. +template struct ZipTupleType { + typedef std::tuple())...> type; +}; + +template +using zip_traits = iterator_facade_base< + ZipType, typename std::common_type::iterator_category...>::type, + // ^ TODO: Implement random access methods. + typename ZipTupleType::type, + typename std::iterator_traits>::type>::difference_type, + // ^ FIXME: This follows boost::make_zip_iterator's assumption that all + // inner iterators have the same difference_type. It would fail if, for + // instance, the second field's difference_type were non-numeric while the + // first is. + typename ZipTupleType::type *, + typename ZipTupleType::type>; + +template +struct zip_common : public zip_traits { + using Base = zip_traits; + using value_type = typename Base::value_type; + std::tuple iterators; -private: - template value_type deres(index_sequence) { +protected: + template value_type deref(index_sequence) const { return value_type(*std::get(iterators)...); } - template decltype(iterators) tup_inc(index_sequence) { + template + decltype(iterators) tup_inc(index_sequence) const { return std::tuple(std::next(std::get(iterators))...); } + template + decltype(iterators) tup_dec(index_sequence) const { + return std::tuple(std::prev(std::get(iterators))...); + } + public: - value_type operator*() { return deres(index_sequence_for{}); } + zip_common(Iters &&... ts) : iterators(std::forward(ts)...) {} + + value_type operator*() { return deref(index_sequence_for{}); } - void operator++() { iterators = tup_inc(index_sequence_for{}); } + const value_type operator*() const { + return deref(index_sequence_for{}); + } - bool operator!=(const zip_first &other) const { - return std::get<0>(iterators) != std::get<0>(other.iterators); + ZipType &operator++() { + iterators = tup_inc(index_sequence_for{}); + return *reinterpret_cast(this); } - zip_first(Iters &&... ts) : iterators(std::forward(ts)...) {} + + ZipType &operator--() { + static_assert(Base::IsBidirectional, + "All inner iterators must be at least bidirectional."); + iterators = tup_dec(index_sequence_for{}); + return *reinterpret_cast(this); + } +}; + +template +struct zip_first : public zip_common, Iters...> { + using Base = zip_common, Iters...>; + + bool operator==(const zip_first &other) const { + return std::get<0>(this->iterators) == std::get<0>(other.iterators); + } + + zip_first(Iters &&... ts) : Base(std::forward(ts)...) {} }; -template class zip_shortest : public zip_first { +template +class zip_shortest : public zip_common, Iters...> { template - bool test(const zip_first &other, index_sequence) const { + bool test(const zip_shortest &other, index_sequence) const { return all_of(std::initializer_list{std::get(this->iterators) != std::get(other.iterators)...}, identity{}); } public: - bool operator!=(const zip_first &other) const { - return test(other, index_sequence_for{}); + using Base = zip_common, Iters...>; + + bool operator==(const zip_shortest &other) const { + return !test(other, index_sequence_for{}); } - zip_shortest(Iters &&... ts) - : zip_first(std::forward(ts)...) {} + + zip_shortest(Iters &&... ts) : Base(std::forward(ts)...) {} }; template