diff --git a/.gitignore b/.gitignore index 224bd2f3a9cf305cc4205f30d7742928de5f8b99..fd308878407aa8e0c6745b1a837a94e3fff0b3e0 100644 --- a/.gitignore +++ b/.gitignore @@ -41,6 +41,8 @@ cscope.out autoconf/aclocal.m4 autoconf/autom4te.cache /compile_commands.json +# Visual Studio built-in CMake configuration +/CMakeSettings.json #==============================================================================# # Directories to ignore (do not add trailing '/'s, they skip symlinks). diff --git a/CMakeLists.txt b/CMakeLists.txt index 3e7a9dd8988f3990f048ad0cf3d2074df3bd7539..4ff0e6a90e571e23ab899b057a1e7141c24547d9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -7,6 +7,10 @@ if(POLICY CMP0068) set(CMAKE_BUILD_WITH_INSTALL_NAME_DIR ON) endif() +if(POLICY CMP0075) + cmake_policy(SET CMP0075 NEW) +endif() + if(NOT DEFINED LLVM_VERSION_MAJOR) set(LLVM_VERSION_MAJOR 8) endif() @@ -855,7 +859,7 @@ if( LLVM_INCLUDE_UTILS ) else() if ( LLVM_INCLUDE_TESTS ) message(FATAL_ERROR "Including tests when not building utils will not work. - Either set LLVM_INCLUDE_UTILS to On, or set LLVM_INCLDE_TESTS to Off.") + Either set LLVM_INCLUDE_UTILS to On, or set LLVM_INCLUDE_TESTS to Off.") endif() endif() @@ -974,7 +978,7 @@ if (NOT LLVM_INSTALL_TOOLCHAIN_ONLY) add_custom_target(llvm-headers DEPENDS intrinsics_gen) set_target_properties(llvm-headers PROPERTIES FOLDER "Misc") - if (NOT CMAKE_CONFIGURATION_TYPES) + if (NOT LLVM_ENABLE_IDE) add_llvm_install_targets(install-llvm-headers DEPENDS llvm-headers COMPONENT llvm-headers) @@ -984,7 +988,7 @@ if (NOT LLVM_INSTALL_TOOLCHAIN_ONLY) add_custom_target(llvm-libraries) set_target_properties(llvm-libraries PROPERTIES FOLDER "Misc") - if (NOT CMAKE_CONFIGURATION_TYPES) + if (NOT LLVM_ENABLE_IDE) add_llvm_install_targets(install-llvm-libraries DEPENDS llvm-libraries COMPONENT llvm-libraries) @@ -995,7 +999,7 @@ if (NOT LLVM_INSTALL_TOOLCHAIN_ONLY) list(REMOVE_DUPLICATES LLVM_LIBS) foreach(lib ${LLVM_LIBS}) add_dependencies(llvm-libraries ${lib}) - if (NOT CMAKE_CONFIGURATION_TYPES) + if (NOT LLVM_ENABLE_IDE) add_dependencies(install-llvm-libraries install-${lib}) endif() endforeach() @@ -1005,7 +1009,7 @@ endif() # This must be at the end of the LLVM root CMakeLists file because it must run # after all targets are created. if(LLVM_DISTRIBUTION_COMPONENTS) - if(CMAKE_CONFIGURATION_TYPES) + if(LLVM_ENABLE_IDE) message(FATAL_ERROR "LLVM_DISTRIBUTION_COMPONENTS cannot be specified with multi-configuration generators (i.e. Xcode or Visual Studio)") endif() diff --git a/CREDITS.TXT b/CREDITS.TXT index 7108051d67ab80cf0f9a31a4e1a004214c21c7a7..e279701f57d90bb182b24dfb48d88c4b64443843 100644 --- a/CREDITS.TXT +++ b/CREDITS.TXT @@ -492,8 +492,8 @@ D: Thread Local Storage implementation N: Bill Wendling I: wendling E: isanbard@gmail.com -D: Release manager, IR Linker, LTO -D: Bunches of stuff +D: Release manager, IR Linker, LTO. +D: Bunches of stuff. N: Bob Wilson E: bob.wilson@acm.org @@ -502,3 +502,11 @@ D: Advanced SIMD (NEON) support in the ARM backend. N: QingShan Zhang E: qshanz@cn.ibm.com D: PowerPC Backend Developer + +N: Li Jia He +E: hljhehlj@cn.ibm.com +D: PowerPC Backend Developer + +N: Zixuan Wu +E: wuzish@cn.ibm.com +D: PowerPC Backend Developer diff --git a/bindings/go/llvm/ir.go b/bindings/go/llvm/ir.go index 0f4877429cc11075a5f3d0a5a8a0e5d89f7f7a5a..1872a2ffe51092a31af4b685827d8ad0e6ffcb8d 100644 --- a/bindings/go/llvm/ir.go +++ b/bindings/go/llvm/ir.go @@ -739,7 +739,6 @@ func (v Value) IsAPHINode() (rv Value) { rv.C = C.LLVMIsAPHINode(v.C func (v Value) IsASelectInst() (rv Value) { rv.C = C.LLVMIsASelectInst(v.C); return } func (v Value) IsAShuffleVectorInst() (rv Value) { rv.C = C.LLVMIsAShuffleVectorInst(v.C); return } func (v Value) IsAStoreInst() (rv Value) { rv.C = C.LLVMIsAStoreInst(v.C); return } -func (v Value) IsATerminatorInst() (rv Value) { rv.C = C.LLVMIsATerminatorInst(v.C); return } func (v Value) IsABranchInst() (rv Value) { rv.C = C.LLVMIsABranchInst(v.C); return } func (v Value) IsAInvokeInst() (rv Value) { rv.C = C.LLVMIsAInvokeInst(v.C); return } func (v Value) IsAReturnInst() (rv Value) { rv.C = C.LLVMIsAReturnInst(v.C); return } @@ -1259,6 +1258,19 @@ func InlineAsm(t Type, asmString, constraints string, hasSideEffects, isAlignSta return } +// Operations on aggregates +func (v Value) Indices() []uint32 { + num := C.LLVMGetNumIndices(v.C) + indicesPtr := C.LLVMGetIndices(v.C) + // https://github.com/golang/go/wiki/cgo#turning-c-arrays-into-go-slices + rawIndices := (*[1 << 30]C.uint)(unsafe.Pointer(indicesPtr))[:num:num] + indices := make([]uint32, num) + for i := range indices { + indices[i] = uint32(rawIndices[i]) + } + return indices +} + //------------------------------------------------------------------------- // llvm.Builder //------------------------------------------------------------------------- diff --git a/bindings/ocaml/llvm/llvm.mli b/bindings/ocaml/llvm/llvm.mli index 97b6a695fa24d65a43af782444eabfbabfaa93af..f12eb6efa61c797639d0d817ac17e4e04f79427b 100644 --- a/bindings/ocaml/llvm/llvm.mli +++ b/bindings/ocaml/llvm/llvm.mli @@ -1887,16 +1887,16 @@ val set_volatile : bool -> llvalue -> unit val is_terminator : llvalue -> bool (** [successor v i] returns the successor at index [i] for the value [v]. - See the method [llvm::TerminatorInst::getSuccessor]. *) + See the method [llvm::Instruction::getSuccessor]. *) val successor : llvalue -> int -> llbasicblock (** [set_successor v i o] sets the successor of the value [v] at the index [i] to the value [o]. - See the method [llvm::TerminatorInst::setSuccessor]. *) + See the method [llvm::Instruction::setSuccessor]. *) val set_successor : llvalue -> int -> llbasicblock -> unit (** [num_successors v] returns the number of successors for the value [v]. - See the method [llvm::TerminatorInst::getNumSuccessors]. *) + See the method [llvm::Instruction::getNumSuccessors]. *) val num_successors : llvalue -> int (** [successors v] returns the successors of [v]. *) diff --git a/bindings/ocaml/llvm/llvm_ocaml.c b/bindings/ocaml/llvm/llvm_ocaml.c index c637941d81d3a297ae764232182a71eeed512a55..cdf6c6a1206a620f7364761add4a7dbef441212b 100644 --- a/bindings/ocaml/llvm/llvm_ocaml.c +++ b/bindings/ocaml/llvm/llvm_ocaml.c @@ -483,9 +483,9 @@ CAMLprim value llvm_struct_set_body(LLVMTypeRef Ty, CAMLprim value llvm_struct_name(LLVMTypeRef Ty) { CAMLparam0(); + CAMLlocal1(result); const char *C = LLVMGetStructName(Ty); if (C) { - CAMLlocal1(result); result = caml_alloc_small(1, 0); Store_field(result, 0, caml_copy_string(C)); CAMLreturn(result); @@ -636,6 +636,7 @@ enum ValueKind { CAMLprim value llvm_classify_value(LLVMValueRef Val) { CAMLparam0(); + CAMLlocal1(result); if (!Val) CAMLreturn(Val_int(NullValue)); if (LLVMIsAConstant(Val)) { @@ -652,7 +653,6 @@ CAMLprim value llvm_classify_value(LLVMValueRef Val) { DEFINE_CASE(Val, ConstantVector); } if (LLVMIsAInstruction(Val)) { - CAMLlocal1(result); result = caml_alloc_small(1, 0); Store_field(result, 0, Val_int(LLVMGetInstructionOpcode(Val))); CAMLreturn(result); @@ -822,12 +822,11 @@ CAMLprim LLVMValueRef llvm_mdnull(LLVMContextRef C) { /* llvalue -> string option */ CAMLprim value llvm_get_mdstring(LLVMValueRef V) { CAMLparam0(); + CAMLlocal2(Option, Str); const char *S; unsigned Len; if ((S = LLVMGetMDString(V, &Len))) { - CAMLlocal2(Option, Str); - Str = caml_alloc_string(Len); memcpy(String_val(Str), S, Len); Option = alloc(1,0); diff --git a/cmake/modules/AddLLVM.cmake b/cmake/modules/AddLLVM.cmake index 4dde95e30f30c6212efa301331fe10c1ed360cc2..189971655583a40532272d608d807212198b14be 100644 --- a/cmake/modules/AddLLVM.cmake +++ b/cmake/modules/AddLLVM.cmake @@ -635,6 +635,7 @@ macro(add_llvm_library name) set_property(GLOBAL APPEND PROPERTY LLVM_EXPORTS_BUILDTREE_ONLY ${name}) else() if (NOT LLVM_INSTALL_TOOLCHAIN_ONLY OR ${name} STREQUAL "LTO" OR + ${name} STREQUAL "OptRemarks" OR (LLVM_LINK_LLVM_DYLIB AND ${name} STREQUAL "LLVM")) set(install_dir lib${LLVM_LIBDIR_SUFFIX}) if(ARG_SHARED OR BUILD_SHARED_LIBS) @@ -659,7 +660,7 @@ macro(add_llvm_library name) ${install_type} DESTINATION ${install_dir} COMPONENT ${name}) - if (NOT CMAKE_CONFIGURATION_TYPES) + if (NOT LLVM_ENABLE_IDE) add_llvm_install_targets(install-${name} DEPENDS ${name} COMPONENT ${name}) @@ -890,7 +891,7 @@ macro(add_llvm_tool name) RUNTIME DESTINATION ${LLVM_TOOLS_INSTALL_DIR} COMPONENT ${name}) - if (NOT CMAKE_CONFIGURATION_TYPES) + if (NOT LLVM_ENABLE_IDE) add_llvm_install_targets(install-${name} DEPENDS ${name} COMPONENT ${name}) @@ -928,7 +929,7 @@ macro(add_llvm_utility name) install (TARGETS ${name} RUNTIME DESTINATION ${LLVM_UTILS_INSTALL_DIR} COMPONENT ${name}) - if (NOT CMAKE_CONFIGURATION_TYPES) + if (NOT LLVM_ENABLE_IDE) add_llvm_install_targets(install-${name} DEPENDS ${name} COMPONENT ${name}) @@ -1409,7 +1410,7 @@ function(add_lit_testsuite target comment) endfunction() function(add_lit_testsuites project directory) - if (NOT CMAKE_CONFIGURATION_TYPES) + if (NOT LLVM_ENABLE_IDE) cmake_parse_arguments(ARG "" "" "PARAMS;DEPENDS;ARGS" ${ARGN}) # Search recursively for test directories by assuming anything not @@ -1468,7 +1469,7 @@ function(llvm_install_library_symlink name dest type) CODE "install_symlink(${full_name} ${full_dest} ${output_dir})" COMPONENT ${component}) - if (NOT CMAKE_CONFIGURATION_TYPES AND NOT ARG_ALWAYS_GENERATE) + if (NOT LLVM_ENABLE_IDE AND NOT ARG_ALWAYS_GENERATE) add_llvm_install_targets(install-${name} DEPENDS ${name} ${dest} install-${dest} COMPONENT ${name}) @@ -1501,7 +1502,7 @@ function(llvm_install_symlink name dest) CODE "install_symlink(${full_name} ${full_dest} ${LLVM_TOOLS_INSTALL_DIR})" COMPONENT ${component}) - if (NOT CMAKE_CONFIGURATION_TYPES AND NOT ARG_ALWAYS_GENERATE) + if (NOT LLVM_ENABLE_IDE AND NOT ARG_ALWAYS_GENERATE) add_llvm_install_targets(install-${name} DEPENDS ${name} ${dest} install-${dest} COMPONENT ${name}) diff --git a/cmake/modules/CMakeLists.txt b/cmake/modules/CMakeLists.txt index 6c316a2f04fb5f5cbca980b2a2c4cc3abeb38c87..f5cc0006fa06ad5fc2a11707be1d3dac062013d4 100644 --- a/cmake/modules/CMakeLists.txt +++ b/cmake/modules/CMakeLists.txt @@ -132,7 +132,7 @@ if (NOT LLVM_INSTALL_TOOLCHAIN_ONLY) PATTERN LLVM-Config.cmake EXCLUDE PATTERN GetHostTriple.cmake EXCLUDE) - if (NOT CMAKE_CONFIGURATION_TYPES) + if (NOT LLVM_ENABLE_IDE) # Add a dummy target so this can be used with LLVM_DISTRIBUTION_COMPONENTS add_custom_target(cmake-exports) add_llvm_install_targets(install-cmake-exports diff --git a/cmake/modules/HandleLLVMOptions.cmake b/cmake/modules/HandleLLVMOptions.cmake index 2c9bd14ad054cc6cf1e20961aa5b8e39e342d503..b590f768244540ffc696a12f73944dadc275fec8 100644 --- a/cmake/modules/HandleLLVMOptions.cmake +++ b/cmake/modules/HandleLLVMOptions.cmake @@ -23,7 +23,7 @@ string(TOUPPER "${LLVM_ENABLE_LTO}" uppercase_LLVM_ENABLE_LTO) # Ninja Job Pool support # The following only works with the Ninja generator in CMake >= 3.0. set(LLVM_PARALLEL_COMPILE_JOBS "" CACHE STRING - "Define the maximum number of concurrent compilation jobs.") + "Define the maximum number of concurrent compilation jobs (Ninja only).") if(LLVM_PARALLEL_COMPILE_JOBS) if(NOT CMAKE_MAKE_PROGRAM MATCHES "ninja") message(WARNING "Job pooling is only available with Ninja generators.") @@ -34,7 +34,7 @@ if(LLVM_PARALLEL_COMPILE_JOBS) endif() set(LLVM_PARALLEL_LINK_JOBS "" CACHE STRING - "Define the maximum number of concurrent link jobs.") + "Define the maximum number of concurrent link jobs (Ninja only).") if(CMAKE_MAKE_PROGRAM MATCHES "ninja") if(NOT LLVM_PARALLEL_LINK_JOBS AND uppercase_LLVM_ENABLE_LTO STREQUAL "THIN") message(STATUS "ThinLTO provides its own parallel linking - limiting parallel link jobs to 2.") @@ -224,6 +224,10 @@ if(NOT WIN32 AND NOT CYGWIN) append_if(SUPPORTS_FVISIBILITY_INLINES_HIDDEN_FLAG "-fvisibility-inlines-hidden" CMAKE_CXX_FLAGS) endif() +if(CMAKE_SIZEOF_VOID_P EQUAL 8 AND MINGW) + add_definitions( -D_FILE_OFFSET_BITS=64 ) +endif() + if( CMAKE_SIZEOF_VOID_P EQUAL 8 AND NOT WIN32 ) # TODO: support other platforms and toolchains. if( LLVM_BUILD_32_BITS ) @@ -576,6 +580,7 @@ if (LLVM_ENABLE_WARNINGS AND (LLVM_COMPILER_IS_GCC_COMPATIBLE OR CLANG_CL)) append("-Wno-long-long" CMAKE_C_FLAGS CMAKE_CXX_FLAGS) endif() + add_flag_if_supported("-Wimplicit-fallthrough" IMPLICIT_FALLTHROUGH_FLAG) add_flag_if_supported("-Wcovered-switch-default" COVERED_SWITCH_DEFAULT_FLAG) append_if(USE_NO_UNINITIALIZED "-Wno-uninitialized" CMAKE_CXX_FLAGS) append_if(USE_NO_MAYBE_UNINITIALIZED "-Wno-maybe-uninitialized" CMAKE_CXX_FLAGS) @@ -868,12 +873,19 @@ else() set(LLVM_ENABLE_PLUGINS ON) endif() +# By default we should enable LLVM_ENABLE_IDE only for multi-configuration +# generators. This option disables optional build system features that make IDEs +# less usable. set(LLVM_ENABLE_IDE_default OFF) -if (XCODE OR MSVC_IDE OR CMAKE_EXTRA_GENERATOR) +if (CMAKE_CONFIGURATION_TYPES) set(LLVM_ENABLE_IDE_default ON) endif() -option(LLVM_ENABLE_IDE "Generate targets and process sources for use with an IDE" - ${LLVM_ENABLE_IDE_default}) +option(LLVM_ENABLE_IDE + "Disable optional build system features that cause problems for IDE generators" + ${LLVM_ENABLE_IDE_default}) +if (CMAKE_CONFIGURATION_TYPES AND NOT LLVM_ENABLE_IDE) + message(WARNING "Disabling LLVM_ENABLE_IDE on multi-configuration generators is not recommended.") +endif() function(get_compile_definitions) get_directory_property(top_dir_definitions DIRECTORY ${CMAKE_SOURCE_DIR} COMPILE_DEFINITIONS) diff --git a/cmake/modules/LLVMProcessSources.cmake b/cmake/modules/LLVMProcessSources.cmake index f65f31d797cf6c0384df64ec95af7b0222455748..7cbd2863500cf1ef606db7b16c474580f51d3147 100644 --- a/cmake/modules/LLVMProcessSources.cmake +++ b/cmake/modules/LLVMProcessSources.cmake @@ -52,16 +52,15 @@ function(llvm_process_sources OUT_VAR) cmake_parse_arguments(ARG "" "" "ADDITIONAL_HEADERS;ADDITIONAL_HEADER_DIRS" ${ARGN}) set(sources ${ARG_UNPARSED_ARGUMENTS}) llvm_check_source_file_list( ${sources} ) - if( LLVM_ENABLE_IDE ) - # This adds .td and .h files to the Visual Studio solution: - add_td_sources(sources) - find_all_header_files(hdrs "${ARG_ADDITIONAL_HEADER_DIRS}") - if (hdrs) - set_source_files_properties(${hdrs} PROPERTIES HEADER_FILE_ONLY ON) - endif() - set_source_files_properties(${ARG_ADDITIONAL_HEADERS} PROPERTIES HEADER_FILE_ONLY ON) - list(APPEND sources ${ARG_ADDITIONAL_HEADERS} ${hdrs}) + + # This adds .td and .h files to the Visual Studio solution: + add_td_sources(sources) + find_all_header_files(hdrs "${ARG_ADDITIONAL_HEADER_DIRS}") + if (hdrs) + set_source_files_properties(${hdrs} PROPERTIES HEADER_FILE_ONLY ON) endif() + set_source_files_properties(${ARG_ADDITIONAL_HEADERS} PROPERTIES HEADER_FILE_ONLY ON) + list(APPEND sources ${ARG_ADDITIONAL_HEADERS} ${hdrs}) set( ${OUT_VAR} ${sources} PARENT_SCOPE ) endfunction(llvm_process_sources) diff --git a/docs/AMDGPUUsage.rst b/docs/AMDGPUUsage.rst index 1ddda1bae9ec5b1aba10329591d7936a46d364a7..03685f9e352834d826da2f6196fbe0f0b7bc3f27 100644 --- a/docs/AMDGPUUsage.rst +++ b/docs/AMDGPUUsage.rst @@ -78,139 +78,143 @@ names from both the *Processor* and *Alternative Processor* can be used. .. table:: AMDGPU Processors :name: amdgpu-processor-table - =========== =============== ============ ===== ========= ======= ================== - Processor Alternative Target dGPU/ Target ROCm Example - Processor Triple APU Features Support Products + =========== =============== ============ ===== ========== ======= ====================== + Processor Alternative Target dGPU/ Target ROCm Example + Processor Triple APU Features Support Products Architecture Supported [Default] - =========== =============== ============ ===== ========= ======= ================== + =========== =============== ============ ===== ========== ======= ====================== **Radeon HD 2000/3000 Series (R600)** [AMD-RADEON-HD-2000-3000]_ - ----------------------------------------------------------------------------------- + ---------------------------------------------------------------------------------------- ``r600`` ``r600`` dGPU ``r630`` ``r600`` dGPU ``rs880`` ``r600`` dGPU ``rv670`` ``r600`` dGPU **Radeon HD 4000 Series (R700)** [AMD-RADEON-HD-4000]_ - ----------------------------------------------------------------------------------- + ---------------------------------------------------------------------------------------- ``rv710`` ``r600`` dGPU ``rv730`` ``r600`` dGPU ``rv770`` ``r600`` dGPU **Radeon HD 5000 Series (Evergreen)** [AMD-RADEON-HD-5000]_ - ----------------------------------------------------------------------------------- + ---------------------------------------------------------------------------------------- ``cedar`` ``r600`` dGPU ``cypress`` ``r600`` dGPU ``juniper`` ``r600`` dGPU ``redwood`` ``r600`` dGPU ``sumo`` ``r600`` dGPU **Radeon HD 6000 Series (Northern Islands)** [AMD-RADEON-HD-6000]_ - ----------------------------------------------------------------------------------- + ---------------------------------------------------------------------------------------- ``barts`` ``r600`` dGPU ``caicos`` ``r600`` dGPU ``cayman`` ``r600`` dGPU ``turks`` ``r600`` dGPU **GCN GFX6 (Southern Islands (SI))** [AMD-GCN-GFX6]_ - ----------------------------------------------------------------------------------- + ---------------------------------------------------------------------------------------- ``gfx600`` - ``tahiti`` ``amdgcn`` dGPU ``gfx601`` - ``hainan`` ``amdgcn`` dGPU - ``oland`` - ``pitcairn`` - ``verde`` **GCN GFX7 (Sea Islands (CI))** [AMD-GCN-GFX7]_ - ----------------------------------------------------------------------------------- - ``gfx700`` - ``kaveri`` ``amdgcn`` APU - A6-7000 - - A6 Pro-7050B - - A8-7100 - - A8 Pro-7150B - - A10-7300 - - A10 Pro-7350B - - FX-7500 - - A8-7200P - - A10-7400P - - FX-7600P - ``gfx701`` - ``hawaii`` ``amdgcn`` dGPU ROCm - FirePro W8100 - - FirePro W9100 - - FirePro S9150 - - FirePro S9170 - ``gfx702`` ``amdgcn`` dGPU ROCm - Radeon R9 290 - - Radeon R9 290x - - Radeon R390 - - Radeon R390x - ``gfx703`` - ``kabini`` ``amdgcn`` APU - E1-2100 - - ``mullins`` - E1-2200 - - E1-2500 - - E2-3000 - - E2-3800 - - A4-5000 - - A4-5100 - - A6-5200 - - A4 Pro-3340B - ``gfx704`` - ``bonaire`` ``amdgcn`` dGPU - Radeon HD 7790 - - Radeon HD 8770 - - R7 260 - - R7 260X + ---------------------------------------------------------------------------------------- + ``gfx700`` - ``kaveri`` ``amdgcn`` APU - A6-7000 + - A6 Pro-7050B + - A8-7100 + - A8 Pro-7150B + - A10-7300 + - A10 Pro-7350B + - FX-7500 + - A8-7200P + - A10-7400P + - FX-7600P + ``gfx701`` - ``hawaii`` ``amdgcn`` dGPU ROCm - FirePro W8100 + - FirePro W9100 + - FirePro S9150 + - FirePro S9170 + ``gfx702`` ``amdgcn`` dGPU ROCm - Radeon R9 290 + - Radeon R9 290x + - Radeon R390 + - Radeon R390x + ``gfx703`` - ``kabini`` ``amdgcn`` APU - E1-2100 + - ``mullins`` - E1-2200 + - E1-2500 + - E2-3000 + - E2-3800 + - A4-5000 + - A4-5100 + - A6-5200 + - A4 Pro-3340B + ``gfx704`` - ``bonaire`` ``amdgcn`` dGPU - Radeon HD 7790 + - Radeon HD 8770 + - R7 260 + - R7 260X **GCN GFX8 (Volcanic Islands (VI))** [AMD-GCN-GFX8]_ - ----------------------------------------------------------------------------------- - ``gfx801`` - ``carrizo`` ``amdgcn`` APU - xnack - A6-8500P - [on] - Pro A6-8500B - - A8-8600P - - Pro A8-8600B - - FX-8800P - - Pro A12-8800B - \ ``amdgcn`` APU - xnack ROCm - A10-8700P - [on] - Pro A10-8700B - - A10-8780P - \ ``amdgcn`` APU - xnack - A10-9600P - [on] - A10-9630P - - A12-9700P - - A12-9730P - - FX-9800P - - FX-9830P - \ ``amdgcn`` APU - xnack - E2-9010 - [on] - A6-9210 - - A9-9410 - ``gfx802`` - ``iceland`` ``amdgcn`` dGPU - xnack ROCm - FirePro S7150 - - ``tonga`` [off] - FirePro S7100 - - FirePro W7100 - - Radeon R285 - - Radeon R9 380 - - Radeon R9 385 - - Mobile FirePro - M7170 - ``gfx803`` - ``fiji`` ``amdgcn`` dGPU - xnack ROCm - Radeon R9 Nano - [off] - Radeon R9 Fury - - Radeon R9 FuryX - - Radeon Pro Duo - - FirePro S9300x2 - - Radeon Instinct MI8 - \ - ``polaris10`` ``amdgcn`` dGPU - xnack ROCm - Radeon RX 470 - [off] - Radeon RX 480 - - Radeon Instinct MI6 - \ - ``polaris11`` ``amdgcn`` dGPU - xnack ROCm - Radeon RX 460 + ---------------------------------------------------------------------------------------- + ``gfx801`` - ``carrizo`` ``amdgcn`` APU - xnack - A6-8500P + [on] - Pro A6-8500B + - A8-8600P + - Pro A8-8600B + - FX-8800P + - Pro A12-8800B + \ ``amdgcn`` APU - xnack ROCm - A10-8700P + [on] - Pro A10-8700B + - A10-8780P + \ ``amdgcn`` APU - xnack - A10-9600P + [on] - A10-9630P + - A12-9700P + - A12-9730P + - FX-9800P + - FX-9830P + \ ``amdgcn`` APU - xnack - E2-9010 + [on] - A6-9210 + - A9-9410 + ``gfx802`` - ``iceland`` ``amdgcn`` dGPU - xnack ROCm - FirePro S7150 + - ``tonga`` [off] - FirePro S7100 + - FirePro W7100 + - Radeon R285 + - Radeon R9 380 + - Radeon R9 385 + - Mobile FirePro + M7170 + ``gfx803`` - ``fiji`` ``amdgcn`` dGPU - xnack ROCm - Radeon R9 Nano + [off] - Radeon R9 Fury + - Radeon R9 FuryX + - Radeon Pro Duo + - FirePro S9300x2 + - Radeon Instinct MI8 + \ - ``polaris10`` ``amdgcn`` dGPU - xnack ROCm - Radeon RX 470 + [off] - Radeon RX 480 + - Radeon Instinct MI6 + \ - ``polaris11`` ``amdgcn`` dGPU - xnack ROCm - Radeon RX 460 [off] ``gfx810`` - ``stoney`` ``amdgcn`` APU - xnack [on] **GCN GFX9** [AMD-GCN-GFX9]_ - ----------------------------------------------------------------------------------- - ``gfx900`` ``amdgcn`` dGPU - xnack ROCm - Radeon Vega - [off] Frontier Edition - - Radeon RX Vega 56 - - Radeon RX Vega 64 - - Radeon RX Vega 64 - Liquid - - Radeon Instinct MI25 - ``gfx902`` ``amdgcn`` APU - xnack - Ryzen 3 2200G - [on] - Ryzen 5 2400G - ``gfx904`` ``amdgcn`` dGPU - xnack *TBA* + ---------------------------------------------------------------------------------------- + ``gfx900`` ``amdgcn`` dGPU - xnack ROCm - Radeon Vega + [off] Frontier Edition + - Radeon RX Vega 56 + - Radeon RX Vega 64 + - Radeon RX Vega 64 + Liquid + - Radeon Instinct MI25 + ``gfx902`` ``amdgcn`` APU - xnack - Ryzen 3 2200G + [on] - Ryzen 5 2400G + ``gfx904`` ``amdgcn`` dGPU - xnack *TBA* [off] - .. TODO - Add product - names. - ``gfx906`` ``amdgcn`` dGPU - xnack *TBA* - [off] - .. TODO - Add product - names. - =========== =============== ============ ===== ========= ======= ================== + .. TODO + Add product + names. + ``gfx906`` ``amdgcn`` dGPU - xnack - Radeon Instinct MI50 + [off] - Radeon Instinct MI60 + sram-ecc + [on] + ``gfx909`` ``amdgcn`` APU - xnack *TBA* (Raven Ridge 2) + [on] + .. TODO + Add product + names. + =========== =============== ============ ===== ========== ======= ====================== .. _amdgpu-target-features: @@ -241,24 +245,26 @@ For example: .. table:: AMDGPU Target Features :name: amdgpu-target-feature-table - ============== ================================================== - Target Feature Description - ============== ================================================== - -m[no-]xnack Enable/disable generating code that has - memory clauses that are compatible with - having XNACK replay enabled. - - This is used for demand paging and page - migration. If XNACK replay is enabled in - the device, then if a page fault occurs - the code may execute incorrectly if the - ``xnack`` feature is not enabled. Executing - code that has the feature enabled on a - device that does not have XNACK replay - enabled will execute correctly, but may - be less performant than code with the - feature disabled. - ============== ================================================== + =============== ================================================== + Target Feature Description + =============== ================================================== + -m[no-]xnack Enable/disable generating code that has + memory clauses that are compatible with + having XNACK replay enabled. + + This is used for demand paging and page + migration. If XNACK replay is enabled in + the device, then if a page fault occurs + the code may execute incorrectly if the + ``xnack`` feature is not enabled. Executing + code that has the feature enabled on a + device that does not have XNACK replay + enabled will execute correctly, but may + be less performant than code with the + feature disabled. + -m[no-]sram-ecc Enable/disable generating code that assumes SRAM + ECC is enabled/disabled. + =============== ================================================== .. _amdgpu-address-spaces: @@ -544,6 +550,17 @@ The AMDGPU backend uses the following ELF header: be 0. See :ref:`amdgpu-target-features`. + ``EF_AMDGPU_SRAM_ECC`` 0x00000200 Indicates if the ``sram-ecc`` + target feature is + enabled for all code + contained in the code object. + If the processor + does not support the + ``sram-ecc`` target + feature then must + be 0. + See + :ref:`amdgpu-target-features`. ================================= ========== ============================= .. table:: AMDGPU ``EF_AMDGPU_MACH`` Values @@ -589,6 +606,7 @@ The AMDGPU backend uses the following ELF header: ``EF_AMDGPU_MACH_AMDGCN_GFX904`` 0x02e ``gfx904`` ``EF_AMDGPU_MACH_AMDGCN_GFX906`` 0x02f ``gfx906`` *reserved* 0x030 Reserved. + ``EF_AMDGPU_MACH_AMDGCN_GFX909`` 0x031 ``gfx909`` ================================= ========== ============================= Sections diff --git a/docs/AdvancedBuilds.rst b/docs/AdvancedBuilds.rst index c559bdeb28023288365df561af3f74a09f6aa257..d2a2ef58b23e4c85a6bea3a2f16baa9a49e36afa 100644 --- a/docs/AdvancedBuilds.rst +++ b/docs/AdvancedBuilds.rst @@ -41,6 +41,16 @@ This command itself isn't terribly useful because it assumes default configurations for each stage. The next series of examples utilize CMake cache scripts to provide more complex options. +By default, only a few CMake options will be passed between stages. +The list, called _BOOTSTRAP_DEFAULT_PASSTHROUGH, is defined in clang/CMakeLists.txt. +To force the passing of the variables between stages, use the -DCLANG_BOOTSTRAP_PASSTHROUGH +CMake option, each variable separated by a ";". As example: + +.. code-block:: console + + $ cmake -G Ninja -DCLANG_ENABLE_BOOTSTRAP=On -DCLANG_BOOTSTRAP_PASSTHROUGH="CMAKE_INSTALL_PREFIX;CMAKE_VERBOSE_MAKEFILE" + $ ninja stage2 + The clang build system refers to builds as stages. A stage1 build is a standard build using the compiler installed on the host, and a stage2 build is built using the stage1 compiler. This nomenclature holds up to more stages too. In diff --git a/docs/BranchWeightMetadata.rst b/docs/BranchWeightMetadata.rst index 9bd8bd4ae744afdfe446e90682cd3c505d01a72f..e09587179ec3b56bbea10b818f628295101314b8 100644 --- a/docs/BranchWeightMetadata.rst +++ b/docs/BranchWeightMetadata.rst @@ -9,10 +9,10 @@ Introduction ============ Branch Weight Metadata represents branch weights as its likeliness to be taken -(see :doc:`BlockFrequencyTerminology`). Metadata is assigned to the -``TerminatorInst`` as a ``MDNode`` of the ``MD_prof`` kind. The first operator -is always a ``MDString`` node with the string "branch_weights". Number of -operators depends on the terminator type. +(see :doc:`BlockFrequencyTerminology`). Metadata is assigned to an +``Instruction`` that is a terminator as a ``MDNode`` of the ``MD_prof`` kind. +The first operator is always a ``MDString`` node with the string +"branch_weights". Number of operators depends on the terminator type. Branch weights might be fetch from the profiling file, or generated based on `__builtin_expect`_ instruction. diff --git a/docs/BugLifeCycle.rst b/docs/BugLifeCycle.rst new file mode 100644 index 0000000000000000000000000000000000000000..c74aa1d3a62bc0e8752915a44b8988101caa5393 --- /dev/null +++ b/docs/BugLifeCycle.rst @@ -0,0 +1,140 @@ +=================== +LLVM Bug Life Cycle +=================== + +.. contents:: + :local: + + + +Introduction - Achieving consistency in how we deal with bug reports +==================================================================== + +We aim to achieve a basic level of consistency in how reported bugs evolve from +being reported, to being worked on, and finally getting closed out. The +consistency helps reporters, developers and others to gain a better +understanding of what a particular bug state actually means and what to expect +might happen next. + +At the same time, we aim to not over-specify the life cycle of bugs in the +`the LLVM Bug Tracking System `_, as the +overall goal is to make it easier to work with and understand the bug reports. + +The main parts of the life cycle documented here are: + +#. `Reporting`_ +#. `Triaging`_ +#. `Actively working on fixing`_ +#. `Closing`_ + +Furthermore, some of the metadata in the bug tracker, such as who to notify on +newly reported bugs or what the breakdown into products & components is we use, +needs to be maintained. See the following for details: + +#. `Maintenance of Bug products/component metadata`_ +#. `Maintenance of cc-by-default settings`_ + + +.. _Reporting: + +Reporting bugs +============== + +See :doc:`HowToSubmitABug` on further details on how to submit good bug reports. + +Make sure that you have one or more people on cc on the bug report that you +think will react to it. We aim to automatically add specific people on cc for +most products/components, but may not always succeed in doing so. + +If you know the area of LLVM code the root cause of the bug is in, good +candidates to add as cc may be the same people you'd ask for a code review in +that area. See :ref:`finding-potential-reviewers` for more details. + + +.. _Triaging: + +Triaging bugs +============= + +Bugs with status NEW indicate that they still need to be triaged. +When triage is complete, the status of the bug is moved to CONFIRMED. + +The goal of triaging a bug is to make sure a newly reported bug ends up in a +good, actionable, state. Try to answer the following questions while triaging. + +* Is the reported behavior actually wrong? + + * E.g. does a miscompile example depend on undefined behavior? + +* Can you easily reproduce the bug? + + * If not, are there reasonable excuses why it cannot easily be reproduced? + +* Is it related to an already reported bug? + + * Use the "See also"/"depends on"/"blocks" fields if so. + * Close it as a duplicate if so, pointing to the issue it duplicates. + +* Are the following fields filled in correctly? + + * Product + * Component + * Title + +* CC others not already cc’ed that you happen to know would be good to pull in. +* Add the "beginner" keyword if you think this would be a good bug to be fixed + by someone new to LLVM. + +.. _Actively working on fixing: + +Actively working on fixing bugs +=============================== + +Please remember to assign the bug to yourself if you're actively working on +fixing it and to unassign it when you're no longer actively working on it. You +unassign a bug by setting the Assignee field to "unassignedbugs@nondot.org". + +.. _Closing: + +Resolving/Closing bugs +====================== + +For simplicity, we only have 1 status for all resolved or closed bugs: +RESOLVED. + +Resolving bugs is good! Make sure to properly record the reason for resolving. +Examples of reasons for resolving are: + +* Revision NNNNNN fixed the bug. +* The bug cannot be reproduced with revision NNNNNN. +* The circumstances for the bug don't apply anymore. +* There is a sound reason for not fixing it (WONTFIX). +* There is a specific and plausible reason to think that a given bug is + otherwise inapplicable or obsolete. + + * One example is an old open bug that doesn't contain enough information to + clearly understand the problem being reported (e.g. not reproducible). It is + fine to resolve such a bug e.g. with resolution WORKSFORME and leaving a + comment to encourage the reporter to reopen the bug with more information + if it's still reproducable on their end. + +If a bug is resolved, please fill in the revision number it was fixed in in the +"Fixed by Commit(s)" field. + + +.. _Maintenance of Bug products/component metadata: + +Maintenance of products/components metadata +=========================================== + +Please raise a bug against "Bugzilla Admin"/"Products" to request any changes +to be made to the breakdown of products & components modeled in Bugzilla. + + +.. _Maintenance of cc-by-default settings: + +Maintenance of cc-by-default settings +===================================== + +Please raise a bug against "Bugzilla Admin"/"Products" to request any changes +to be made to the cc-by-default settings for specific components. diff --git a/docs/CommandGuide/FileCheck.rst b/docs/CommandGuide/FileCheck.rst index 75df8a6226893050345619bf7c0db90d610ad449..6581b33ba1c716ccc6e0f53c39b5b2fb3745907e 100644 --- a/docs/CommandGuide/FileCheck.rst +++ b/docs/CommandGuide/FileCheck.rst @@ -24,6 +24,9 @@ match. The file to verify is read from standard input unless the OPTIONS ------- +Options are parsed from the environment variable ``FILECHECK_OPTS`` +and from the command line. + .. option:: -help Print a summary of command line options. @@ -116,6 +119,10 @@ OPTIONS as old tests are migrated to the new non-overlapping ``CHECK-DAG:`` implementation. +.. option:: --color + + Use colors in output (autodetected by default). + EXIT STATUS ----------- diff --git a/docs/CommandGuide/llvm-exegesis.rst b/docs/CommandGuide/llvm-exegesis.rst index 4181a9987213eec01094d92722c429bd34ffc5c9..f27db9e57edc6c4a8d93764ba39178eb6e094911 100644 --- a/docs/CommandGuide/llvm-exegesis.rst +++ b/docs/CommandGuide/llvm-exegesis.rst @@ -175,9 +175,10 @@ OPTIONS Specify the opcode to measure, by index. See example 1 for details. Either `opcode-index`, `opcode-name` or `snippets-file` must be set. -.. option:: -opcode-name= +.. option:: -opcode-name=,,... - Specify the opcode to measure, by name. See example 1 for details. + Specify the opcode to measure, by name. Several opcodes can be specified as + a comma-separated list. See example 1 for details. Either `opcode-index`, `opcode-name` or `snippets-file` must be set. .. option:: -snippets-file= @@ -223,6 +224,10 @@ OPTIONS If set, ignore instructions that do not have a sched class (class idx = 0). + .. option:: -mcpu= + + If set, measure the cpu characteristics using the counters for this CPU. This + is useful when creating new sched models (the host CPU is unknown to LLVM). EXIT STATUS ----------- diff --git a/docs/CommandGuide/tblgen.rst b/docs/CommandGuide/tblgen.rst index 55b542948469d1d9f256497c59f1d631e285a53a..3105e0c8076710abdcdbaa2ca800150b8c837e9b 100644 --- a/docs/CommandGuide/tblgen.rst +++ b/docs/CommandGuide/tblgen.rst @@ -130,6 +130,10 @@ OPTIONS Generate enhanced disassembly info. +.. option:: -gen-exegesis + + Generate llvm-exegesis tables. + .. option:: -version Show the version number of this program. diff --git a/docs/DeveloperPolicy.rst b/docs/DeveloperPolicy.rst index 4eda6c77b9f7ea239b17c6dbb50f278b749566e4..9125197a73a86a16a20036d5882ae915971a3690 100644 --- a/docs/DeveloperPolicy.rst +++ b/docs/DeveloperPolicy.rst @@ -730,10 +730,6 @@ code already distributed under a more liberal license (like the UIUC license), and GPL-containing subprojects are kept in separate SVN repositories whose LICENSE.txt files specifically indicate that they contain GPL code. -We have no plans to change the license of LLVM. If you have questions or -comments about the license, please contact the `LLVM Developer's Mailing -List `_. - Patents ------- diff --git a/docs/HowToBuildWithPGO.rst b/docs/HowToBuildWithPGO.rst new file mode 100644 index 0000000000000000000000000000000000000000..ba93bc64a294ab9b2cdd1763160620336bf6070a --- /dev/null +++ b/docs/HowToBuildWithPGO.rst @@ -0,0 +1,163 @@ +============================================================= +How To Build Clang and LLVM with Profile-Guided Optimizations +============================================================= + +Introduction +============ + +PGO (Profile-Guided Optimization) allows your compiler to better optimize code +for how it actually runs. Users report that applying this to Clang and LLVM can +decrease overall compile time by 20%. + +This guide walks you through how to build Clang with PGO, though it also applies +to other subprojects, such as LLD. + + +Using the script +================ + +We have a script at ``utils/collect_and_build_with_pgo.py``. This script is +tested on a few Linux flavors, and requires a checkout of LLVM, Clang, and +compiler-rt. Despite the the name, it performs four clean builds of Clang, so it +can take a while to run to completion. Please see the script's ``--help`` for +more information on how to run it, and the different options available to you. +If you want to get the most out of PGO for a particular use-case (e.g. compiling +a specific large piece of software), please do read the section below on +'benchmark' selection. + +Please note that this script is only tested on a few Linux distros. Patches to +add support for other platforms, as always, are highly appreciated. :) + +This script also supports a ``--dry-run`` option, which causes it to print +important commands instead of running them. + + +Selecting 'benchmarks' +====================== + +PGO does best when the profiles gathered represent how the user plans to use the +compiler. Notably, highly accurate profiles of llc building x86_64 code aren't +incredibly helpful if you're going to be targeting ARM. + +By default, the script above does two things to get solid coverage. It: + +- runs all of Clang and LLVM's lit tests, and +- uses the instrumented Clang to build Clang, LLVM, and all of the other + LLVM subprojects available to it. + +Together, these should give you: + +- solid coverage of building C++, +- good coverage of building C, +- great coverage of running optimizations, +- great coverage of the backend for your host's architecture, and +- some coverage of other architectures (if other arches are supported backends). + +Altogether, this should cover a diverse set of uses for Clang and LLVM. If you +have very specific needs (e.g. your compiler is meant to compile a large browser +for four different platforms, or similar), you may want to do something else. +This is configurable in the script itself. + + +Building Clang with PGO +======================= + +If you prefer to not use the script, this briefly goes over how to build +Clang/LLVM with PGO. + +First, you should have at least LLVM, Clang, and compiler-rt checked out +locally. + +Next, at a high level, you're going to need to do the following: + +1. Build a standard Release Clang and the relevant libclang_rt.profile library +2. Build Clang using the Clang you built above, but with instrumentation +3. Use the instrumented Clang to generate profiles, which consists of two steps: + + - Running the instrumented Clang/LLVM/lld/etc. on tasks that represent how + users will use said tools. + - Using a tool to convert the "raw" profiles generated above into a single, + final PGO profile. + +4. Build a final release Clang (along with whatever other binaries you need) + using the profile collected from your benchmark + +In more detailed steps: + +1. Configure a Clang build as you normally would. It's highly recommended that + you use the Release configuration for this, since it will be used to build + another Clang. Because you need Clang and supporting libraries, you'll want + to build the ``all`` target (e.g. ``ninja all`` or ``make -j4 all``). + +2. Configure a Clang build as above, but add the following CMake args: + + - ``-DLLVM_BUILD_INSTRUMENTED=IR`` -- This causes us to build everything + with instrumentation. + - ``-DLLVM_BUILD_RUNTIME=No`` -- A few projects have bad interactions when + built with profiling, and aren't necessary to build. This flag turns them + off. + - ``-DCMAKE_C_COMPILER=/path/to/stage1/clang`` - Use the Clang we built in + step 1. + - ``-DCMAKE_CXX_COMPILER=/path/to/stage1/clang++`` - Same as above. + + In this build directory, you simply need to build the ``clang`` target (and + whatever supporting tooling your benchmark requires). + +3. As mentioned above, this has two steps: gathering profile data, and then + massaging it into a useful form: + + a. Build your benchmark using the Clang generated in step 2. The 'standard' + benchmark recommended is to run ``check-clang`` and ``check-llvm`` in your + instrumented Clang's build directory, and to do a full build of Clang/LLVM + using your instrumented Clang. So, create yet another build directory, + with the following CMake arguments: + + - ``-DCMAKE_C_COMPILER=/path/to/stage2/clang`` - Use the Clang we built in + step 2. + - ``-DCMAKE_CXX_COMPILER=/path/to/stage2/clang++`` - Same as above. + + If your users are fans of debug info, you may want to consider using + ``-DCMAKE_BUILD_TYPE=RelWithDebInfo`` instead of + ``-DCMAKE_BUILD_TYPE=Release``. This will grant better coverage of + debug info pieces of clang, but will take longer to complete and will + result in a much larger build directory. + + It's recommended to build the ``all`` target with your instrumented Clang, + since more coverage is often better. + + b. You should now have a few ``*.profdata`` files in + ``path/to/stage2/profiles/``. You need to merge these using + ``llvm-profdata`` (even if you only have one! The profile merge transforms + profraw into actual profile data, as well). This can be done with + ``/path/to/stage1/llvm-profdata -merge + -output=/path/to/output/profdata.prof path/to/stage2/profiles/*.profdata``. + +4. Now, build your final, PGO-optimized Clang. To do this, you'll want to pass + the following additional arguments to CMake. + + - ``-DLLVM_PROFDATA_FILE=/path/to/output/profdata.prof`` - Use the PGO + profile from the previous step. + - ``-DCMAKE_C_COMPILER=/path/to/stage1/clang`` - Use the Clang we built in + step 1. + - ``-DCMAKE_CXX_COMPILER=/path/to/stage1/clang++`` - Same as above. + + From here, you can build whatever targets you need. + + .. note:: + You may see warnings about a mismatched profile in the build output. These + are generally harmless. To silence them, you can add + ``-DCMAKE_C_FLAGS='-Wno-backend-plugin' + -DCMAKE_CXX_FLAGS='-Wno-backend-plugin'`` to your CMake invocation. + + +Congrats! You now have a Clang built with profile-guided optimizations, and you +can delete all but the final build directory if you'd like. + +If this worked well for you and you plan on doing it often, there's a slight +optimization that can be made: LLVM and Clang have a tool called tblgen that's +built and run during the build process. While it's potentially nice to build +this for coverage as part of step 3, none of your other builds should benefit +from building it. You can pass the CMake options +``-DCLANG_TABLEGEN=/path/to/stage1/bin/clang-tblgen +-DLLVM_TABLEGEN=/path/to/stage1/bin/llvm-tblgen`` to steps 2 and onward to avoid +these useless rebuilds. diff --git a/docs/LangRef.rst b/docs/LangRef.rst index 9fcfd29a6e85888daef0d0e3e797792223467d44..06e092fb9fc5269b60fd6f5d69a315002d933be9 100644 --- a/docs/LangRef.rst +++ b/docs/LangRef.rst @@ -1450,6 +1450,10 @@ example: ``noredzone`` This attribute indicates that the code generator should not use a red zone, even if the target-specific ABI normally permits it. +``indirect-tls-seg-refs`` + This attribute indicates that the code generator should not use + direct TLS access through segment registers, even if the + target-specific ABI normally permits it. ``noreturn`` This function attribute indicates that the function never returns normally. This produces undefined behavior at runtime if the @@ -2922,7 +2926,7 @@ Simple Constants hexadecimal notation (see below). The assembler requires the exact decimal value of a floating-point constant. For example, the assembler accepts 1.25 but rejects 1.3 because 1.3 is a repeating - decimal in binary. Floating-point constants must have a + decimal in binary. Floating-point constants must have a :ref:`floating-point ` type. **Null pointer constants** The identifier '``null``' is recognized as a null pointer constant @@ -3327,7 +3331,7 @@ The following is the syntax for constant expressions: value won't fit in the integer type, the result is a :ref:`poison value `. ``uitofp (CST to TYPE)`` - Convert an unsigned integer constant to the corresponding + Convert an unsigned integer constant to the corresponding floating-point constant. TYPE must be a scalar or vector floating-point type. CST must be of scalar or vector integer type. Both CST and TYPE must be scalars, or vectors of the same number of elements. @@ -5430,7 +5434,7 @@ Irreducible loop header weights are typically based on profile data. '``invariant.group``' Metadata ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -The experimental ``invariant.group`` metadata may be attached to +The experimental ``invariant.group`` metadata may be attached to ``load``/``store`` instructions referencing a single metadata with no entries. The existence of the ``invariant.group`` metadata on the instruction tells the optimizer that every ``load`` and ``store`` to the same pointer operand @@ -6871,7 +6875,7 @@ Arguments: """""""""" The two arguments to the '``fadd``' instruction must be -:ref:`floating-point ` or :ref:`vector ` of +:ref:`floating-point ` or :ref:`vector ` of floating-point values. Both arguments must have identical types. Semantics: @@ -6879,7 +6883,7 @@ Semantics: The value produced is the floating-point sum of the two operands. This instruction is assumed to execute in the default :ref:`floating-point -environment `. +environment `. This instruction can also take any number of :ref:`fast-math flags `, which are optimization hints to enable otherwise unsafe floating-point optimizations: @@ -6968,7 +6972,7 @@ Arguments: """""""""" The two arguments to the '``fsub``' instruction must be -:ref:`floating-point ` or :ref:`vector ` of +:ref:`floating-point ` or :ref:`vector ` of floating-point values. Both arguments must have identical types. Semantics: @@ -6976,7 +6980,7 @@ Semantics: The value produced is the floating-point difference of the two operands. This instruction is assumed to execute in the default :ref:`floating-point -environment `. +environment `. This instruction can also take any number of :ref:`fast-math flags `, which are optimization hints to enable otherwise unsafe floating-point optimizations: @@ -7063,7 +7067,7 @@ Arguments: """""""""" The two arguments to the '``fmul``' instruction must be -:ref:`floating-point ` or :ref:`vector ` of +:ref:`floating-point ` or :ref:`vector ` of floating-point values. Both arguments must have identical types. Semantics: @@ -7071,7 +7075,7 @@ Semantics: The value produced is the floating-point product of the two operands. This instruction is assumed to execute in the default :ref:`floating-point -environment `. +environment `. This instruction can also take any number of :ref:`fast-math flags `, which are optimization hints to enable otherwise unsafe floating-point optimizations: @@ -7197,7 +7201,7 @@ Arguments: """""""""" The two arguments to the '``fdiv``' instruction must be -:ref:`floating-point ` or :ref:`vector ` of +:ref:`floating-point ` or :ref:`vector ` of floating-point values. Both arguments must have identical types. Semantics: @@ -7205,7 +7209,7 @@ Semantics: The value produced is the floating-point quotient of the two operands. This instruction is assumed to execute in the default :ref:`floating-point -environment `. +environment `. This instruction can also take any number of :ref:`fast-math flags `, which are optimization hints to enable otherwise unsafe floating-point optimizations: @@ -7340,7 +7344,7 @@ Arguments: """""""""" The two arguments to the '``frem``' instruction must be -:ref:`floating-point ` or :ref:`vector ` of +:ref:`floating-point ` or :ref:`vector ` of floating-point values. Both arguments must have identical types. Semantics: @@ -7348,10 +7352,10 @@ Semantics: The value produced is the floating-point remainder of the two operands. This is the same output as a libm '``fmod``' function, but without any -possibility of setting ``errno``. The remainder has the same sign as the +possibility of setting ``errno``. The remainder has the same sign as the dividend. This instruction is assumed to execute in the default :ref:`floating-point -environment `. +environment `. This instruction can also take any number of :ref:`fast-math flags `, which are optimization hints to enable otherwise unsafe floating-point optimizations: @@ -8805,7 +8809,7 @@ Semantics: The '``fptrunc``' instruction casts a ``value`` from a larger :ref:`floating-point ` type to a smaller :ref:`floating-point -` type. +` type. This instruction is assumed to execute in the default :ref:`floating-point environment `. @@ -10324,7 +10328,28 @@ Note that calling this intrinsic does not prevent function inlining or other aggressive transformations, so the value returned may not be that of the obvious source-language caller. -This intrinsic is only implemented for x86. +This intrinsic is only implemented for x86 and aarch64. + +'``llvm.sponentry``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare i8* @llvm.sponentry() + +Overview: +""""""""" + +The '``llvm.sponentry``' intrinsic returns the stack pointer value at +the entry of the current function calling this intrinsic. + +Semantics: +"""""""""" + +Note this intrinsic is only verified on AArch64. '``llvm.frameaddress``' Intrinsic ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -11560,6 +11585,82 @@ NaN, the intrinsic lowering is responsible for quieting the inputs to correctly return the non-NaN input (e.g. by using the equivalent of ``llvm.canonicalize``). +'``llvm.minimum.*``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +This is an overloaded intrinsic. You can use ``llvm.minimum`` on any +floating-point or vector of floating-point type. Not all targets support +all types however. + +:: + + declare float @llvm.minimum.f32(float %Val0, float %Val1) + declare double @llvm.minimum.f64(double %Val0, double %Val1) + declare x86_fp80 @llvm.minimum.f80(x86_fp80 %Val0, x86_fp80 %Val1) + declare fp128 @llvm.minimum.f128(fp128 %Val0, fp128 %Val1) + declare ppc_fp128 @llvm.minimum.ppcf128(ppc_fp128 %Val0, ppc_fp128 %Val1) + +Overview: +""""""""" + +The '``llvm.minimum.*``' intrinsics return the minimum of the two +arguments, propagating NaNs and treating -0.0 as less than +0.0. + + +Arguments: +"""""""""" + +The arguments and return value are floating-point numbers of the same +type. + +Semantics: +"""""""""" +If either operand is a NaN, returns NaN. Otherwise returns the lesser +of the two arguments. -0.0 is considered to be less than +0.0 for this +intrinsic. Note that these are the semantics specified in the draft of +IEEE 754-2018. + +'``llvm.maximum.*``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +This is an overloaded intrinsic. You can use ``llvm.maximum`` on any +floating-point or vector of floating-point type. Not all targets support +all types however. + +:: + + declare float @llvm.maximum.f32(float %Val0, float %Val1) + declare double @llvm.maximum.f64(double %Val0, double %Val1) + declare x86_fp80 @llvm.maximum.f80(x86_fp80 %Val0, x86_fp80 %Val1) + declare fp128 @llvm.maximum.f128(fp128 %Val0, fp128 %Val1) + declare ppc_fp128 @llvm.maximum.ppcf128(ppc_fp128 %Val0, ppc_fp128 %Val1) + +Overview: +""""""""" + +The '``llvm.maximum.*``' intrinsics return the maximum of the two +arguments, propagating NaNs and treating -0.0 as less than +0.0. + + +Arguments: +"""""""""" + +The arguments and return value are floating-point numbers of the same +type. + +Semantics: +"""""""""" +If either operand is a NaN, returns NaN. Otherwise returns the greater +of the two arguments. -0.0 is considered to be less than +0.0 for this +intrinsic. Note that these are the semantics specified in the draft of +IEEE 754-2018. + '``llvm.copysign.*``' Intrinsic ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -12035,11 +12136,11 @@ Overview: The '``llvm.fshl``' family of intrinsic functions performs a funnel shift left: the first two values are concatenated as { %a : %b } (%a is the most significant -bits of the wide value), the combined value is shifted left, and the most -significant bits are extracted to produce a result that is the same size as the -original arguments. If the first 2 arguments are identical, this is equivalent -to a rotate left operation. For vector types, the operation occurs for each -element of the vector. The shift argument is treated as an unsigned amount +bits of the wide value), the combined value is shifted left, and the most +significant bits are extracted to produce a result that is the same size as the +original arguments. If the first 2 arguments are identical, this is equivalent +to a rotate left operation. For vector types, the operation occurs for each +element of the vector. The shift argument is treated as an unsigned amount modulo the element size of the arguments. Arguments: @@ -12081,11 +12182,11 @@ Overview: The '``llvm.fshr``' family of intrinsic functions performs a funnel shift right: the first two values are concatenated as { %a : %b } (%a is the most significant -bits of the wide value), the combined value is shifted right, and the least -significant bits are extracted to produce a result that is the same size as the -original arguments. If the first 2 arguments are identical, this is equivalent -to a rotate right operation. For vector types, the operation occurs for each -element of the vector. The shift argument is treated as an unsigned amount +bits of the wide value), the combined value is shifted right, and the least +significant bits are extracted to produce a result that is the same size as the +original arguments. If the first 2 arguments are identical, this is equivalent +to a rotate right operation. For vector types, the operation occurs for each +element of the vector. The shift argument is treated as an unsigned amount modulo the element size of the arguments. Arguments: @@ -13366,7 +13467,7 @@ The '``llvm.masked.expandload``' intrinsic is designed for reading multiple scal %Tmp = call <8 x double> @llvm.masked.expandload.v8f64(double* %Bptr, <8 x i1> %Mask, <8 x double> undef) ; Store the result in A call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> %Tmp, <8 x double>* %Aptr, i32 8, <8 x i1> %Mask) - + ; %Bptr should be increased on each iteration according to the number of '1' elements in the Mask. %MaskI = bitcast <8 x i1> %Mask to i8 %MaskIPopcnt = call i8 @llvm.ctpop.i8(i8 %MaskI) @@ -13423,7 +13524,7 @@ The '``llvm.masked.compressstore``' intrinsic is designed for compressing data i %Tmp = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* %Aptr, i32 8, <8 x i1> %Mask, <8 x double> undef) ; Store all selected elements consecutively in array B call @llvm.masked.compressstore.v8f64(<8 x double> %Tmp, double* %Bptr, <8 x i1> %Mask) - + ; %Bptr should be increased on each iteration according to the number of '1' elements in the Mask. %MaskI = bitcast <8 x i1> %Mask to i8 %MaskIPopcnt = call i8 @llvm.ctpop.i8(i8 %MaskI) @@ -13915,7 +14016,7 @@ value operands and has the same type as the operands. The remainder has the same sign as the dividend. '``llvm.experimental.constrained.fma``' Intrinsic -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Syntax: """"""" @@ -14056,7 +14157,7 @@ Overview: The '``llvm.experimental.constrained.powi``' intrinsic returns the first operand raised to the (positive or negative) power specified by the second operand. The -order of evaluation of multiplications is not defined. When a vector of +order of evaluation of multiplications is not defined. When a vector of floating-point type is used, the second argument remains a scalar integer value. @@ -14382,7 +14483,7 @@ Overview: """"""""" The '``llvm.experimental.constrained.nearbyint``' intrinsic returns the first -operand rounded to the nearest integer. It will not raise an inexact +operand rounded to the nearest integer. It will not raise an inexact floating-point exception if the operand is not an integer. @@ -14405,6 +14506,225 @@ mode is determined by the runtime floating-point environment. The rounding mode argument is only intended as information to the compiler. +'``llvm.experimental.constrained.maxnum``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare + @llvm.experimental.constrained.maxnum( , + metadata , + metadata ) + +Overview: +""""""""" + +The '``llvm.experimental.constrained.maxnum``' intrinsic returns the maximum +of the two arguments. + +Arguments: +"""""""""" + +The first two arguments and the return value are floating-point numbers +of the same type. + +The third and forth arguments specify the rounding mode and exception +behavior as described above. + +Semantics: +"""""""""" + +This function follows the IEEE-754 semantics for maxNum. The rounding mode is +described, not determined, by the rounding mode argument. The actual rounding +mode is determined by the runtime floating-point environment. The rounding +mode argument is only intended as information to the compiler. + + +'``llvm.experimental.constrained.minnum``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare + @llvm.experimental.constrained.minnum( , + metadata , + metadata ) + +Overview: +""""""""" + +The '``llvm.experimental.constrained.minnum``' intrinsic returns the minimum +of the two arguments. + +Arguments: +"""""""""" + +The first two arguments and the return value are floating-point numbers +of the same type. + +The third and forth arguments specify the rounding mode and exception +behavior as described above. + +Semantics: +"""""""""" + +This function follows the IEEE-754 semantics for minNum. The rounding mode is +described, not determined, by the rounding mode argument. The actual rounding +mode is determined by the runtime floating-point environment. The rounding +mode argument is only intended as information to the compiler. + + +'``llvm.experimental.constrained.ceil``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare + @llvm.experimental.constrained.ceil( , + metadata , + metadata ) + +Overview: +""""""""" + +The '``llvm.experimental.constrained.ceil``' intrinsic returns the ceiling of the +first operand. + +Arguments: +"""""""""" + +The first argument and the return value are floating-point numbers of the same +type. + +The second and third arguments specify the rounding mode and exception +behavior as described above. The rounding mode is currently unused for this +intrinsic. + +Semantics: +"""""""""" + +This function returns the same values as the libm ``ceil`` functions +would and handles error conditions in the same way. + + +'``llvm.experimental.constrained.floor``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare + @llvm.experimental.constrained.floor( , + metadata , + metadata ) + +Overview: +""""""""" + +The '``llvm.experimental.constrained.floor``' intrinsic returns the floor of the +first operand. + +Arguments: +"""""""""" + +The first argument and the return value are floating-point numbers of the same +type. + +The second and third arguments specify the rounding mode and exception +behavior as described above. The rounding mode is currently unused for this +intrinsic. + +Semantics: +"""""""""" + +This function returns the same values as the libm ``floor`` functions +would and handles error conditions in the same way. + + +'``llvm.experimental.constrained.round``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare + @llvm.experimental.constrained.round( , + metadata , + metadata ) + +Overview: +""""""""" + +The '``llvm.experimental.constrained.round``' intrinsic returns the first +operand rounded to the nearest integer. + +Arguments: +"""""""""" + +The first argument and the return value are floating-point numbers of the same +type. + +The second and third arguments specify the rounding mode and exception +behavior as described above. The rounding mode is currently unused for this +intrinsic. + +Semantics: +"""""""""" + +This function returns the same values as the libm ``round`` functions +would and handles error conditions in the same way. + + +'``llvm.experimental.constrained.trunc``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare + @llvm.experimental.constrained.trunc( , + metadata , + metadata ) + +Overview: +""""""""" + +The '``llvm.experimental.constrained.trunc``' intrinsic returns the first +operand rounded to the nearest integer not larger in magnitude than the +operand. + +Arguments: +"""""""""" + +The first argument and the return value are floating-point numbers of the same +type. + +The second and third arguments specify the truncing mode and exception +behavior as described above. The truncing mode is currently unused for this +intrinsic. + +Semantics: +"""""""""" + +This function returns the same values as the libm ``trunc`` functions +would and handles error conditions in the same way. + + General Intrinsics ------------------ @@ -15097,6 +15417,51 @@ Semantics: This intrinsic actually does nothing, but optimizers must assume that it has externally observable side effects. +'``llvm.is.constant.*``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +This is an overloaded intrinsic. You can use llvm.is.constant with any argument type. + +:: + + declare i1 @llvm.is.constant.i32(i32 %operand) nounwind readnone + declare i1 @llvm.is.constant.f32(float %operand) nounwind readnone + declare i1 @llvm.is.constant.TYPENAME(TYPE %operand) nounwind readnone + +Overview: +""""""""" + +The '``llvm.is.constant``' intrinsic will return true if the argument +is known to be a manifest compile-time constant. It is guaranteed to +fold to either true or false before generating machine code. + +Semantics: +"""""""""" + +This intrinsic generates no code. If its argument is known to be a +manifest compile-time constant value, then the intrinsic will be +converted to a constant true value. Otherwise, it will be converted to +a constant false value. + +In particular, note that if the argument is a constant expression +which refers to a global (the address of which _is_ a constant, but +not manifest during the compile), then the intrinsic evaluates to +false. + +The result also intentionally depends on the result of optimization +passes -- e.g., the result can change depending on whether a +function gets inlined or not. A function's parameters are +obviously not constant. However, a call like +``llvm.is.constant.i32(i32 %param)`` *can* return true after the +function is inlined, if the value passed to the function parameter was +a constant. + +On the other hand, if constant folding is not run, it will never +evaluate to true, even in simple cases. + Stack Map Intrinsics -------------------- diff --git a/docs/Phabricator.rst b/docs/Phabricator.rst index 53cb3b5980a98a5373290366d8ff52727814f9d8..640e1611da6c442d395dc9de9304ab6c85c1460a 100644 --- a/docs/Phabricator.rst +++ b/docs/Phabricator.rst @@ -94,6 +94,12 @@ them to participate. Many people will see the email notification on cfe-commits or llvm-commits, and if the subject line suggests the patch is something they should look at, they will. + +.. _finding-potential-reviewers: + +Finding potential reviewers +--------------------------- + Here are a couple of ways to pick the initial reviewer(s): * Use ``svn blame`` and the commit log to find names of people who have diff --git a/docs/ProgrammersManual.rst b/docs/ProgrammersManual.rst index 64b7de5be157573a1a03a4a4a01a03d7c52984b3..88c56700eb36967cb821b950c1cc7ba855ae03e1 100644 --- a/docs/ProgrammersManual.rst +++ b/docs/ProgrammersManual.rst @@ -3736,13 +3736,6 @@ Important Subclasses of the ``Instruction`` class `ICmpInst `_ (integer opreands), and `FCmpInst `_ (floating point operands). -.. _TerminatorInst: - -* ``TerminatorInst`` - - This subclass is the parent of all terminator instructions (those which can - terminate a block). - .. _m_Instruction: Important Public Members of the ``Instruction`` class @@ -4068,7 +4061,7 @@ This class represents a single entry single exit section of the code, commonly known as a basic block by the compiler community. The ``BasicBlock`` class maintains a list of Instruction_\ s, which form the body of the block. Matching the language definition, the last element of this list of instructions is always -a terminator instruction (a subclass of the TerminatorInst_ class). +a terminator instruction. In addition to tracking the list of instructions that make up the block, the ``BasicBlock`` class also keeps track of the :ref:`Function ` that @@ -4119,7 +4112,7 @@ Important Public Members of the ``BasicBlock`` class Returns a pointer to :ref:`Function ` the block is embedded into, or a null pointer if it is homeless. -* ``TerminatorInst *getTerminator()`` +* ``Instruction *getTerminator()`` Returns a pointer to the terminator instruction that appears at the end of the ``BasicBlock``. If there is no terminator instruction, or if the last diff --git a/docs/Proposals/TestSuite.rst b/docs/Proposals/TestSuite.rst new file mode 100644 index 0000000000000000000000000000000000000000..8c7531783d44bfe44a8d619d888701a7536e8e65 --- /dev/null +++ b/docs/Proposals/TestSuite.rst @@ -0,0 +1,317 @@ +===================== +Test-Suite Extentions +===================== + +.. contents:: + :depth: 1 + :local: + +Abstract +======== + +These are ideas for additional programs, benchmarks, applications and +algorithms that could be added to the LLVM Test-Suite. +The test-suite could be much larger than it is now, which would help us +detecting compiler errors (crashes, miscompiles) during development. + +Most probably, the reason why the programs below have not been added to +the test-suite yet is that nobody has found time to do it. But there +might be other issues as well, such as + + * Licensing (Support can still be added as external module, + like for the SPEC benchmarks) + + * Language (in particular, there is no official LLVM frontend + for FORTRAN yet) + + * Parallelism (currently, all programs in test-suite use + one thread only) + +Benchmarks +========== + +SPEC CPU 2017 +------------- +https://www.spec.org/cpu2017/ + +The following have not been included yet because they contain Fortran +code. + +In case of cactuBSSN only a small portion is Fortran. The hosts's +Fortran compiler could be used for these parts. + +Note that CMake's Ninja generator has difficulties with Fortran. See the +`CMake documentation `_ +for details. + + * 503.bwaves_r/603.bwaves_s + * 507.cactuBSSN_r + * 521.wrf_r/621.wrf_s + * 527.cam4_r/627.cam4_s + * 628.pop2_s + * 548.exchange2_r/648.exchange2_s + * 549.fotonik3d_r/649.fotonik3d_s + * 554.roms_r/654.roms_s + +SPEC OMP2012 +------------ +https://www.spec.org/omp2012/ + + * 350.md + * 351.bwaves + * 352.nab + * 357.bt331 + * 358.botsalgn + * 359.botsspar + * 360.ilbdc + * 362.fma3d + * 363.swim + * 367.imagick + * 370.mgrid331 + * 371.applu331 + * 372.smithwa + * 376.kdtree + +OpenCV +------ +https://opencv.org/ + +OpenMP 4.x SIMD Benchmarks +-------------------------- +https://github.com/flwende/simd_benchmarks + +PWM-benchmarking +---------------- +https://github.com/tbepler/PWM-benchmarking + +SLAMBench +--------- +https://github.com/pamela-project/slambench + +FireHose +-------- +http://firehose.sandia.gov/ + +A Benchmark for the C/C++ Standard Library +------------------------------------------ +https://github.com/hiraditya/std-benchmark + +OpenBenchmarking.org CPU / Processor Suite +------------------------------------------ +https://openbenchmarking.org/suite/pts/cpu + +This is a subset of the +`Phoronix Test Suite `_ +and is itself a collection of benchmark suites + +Parboil Benchmarks +------------------ +http://impact.crhc.illinois.edu/parboil/parboil.aspx + +MachSuite +--------- +https://breagen.github.io/MachSuite/ + +Rodinia +------- +http://lava.cs.virginia.edu/Rodinia/download_links.htm + +Rodinia has already been partially included in +MultiSource/Benchmarks/Rodinia. Benchmarks still missing are: + + * streamcluster + * particlefilter + * nw + * nn + * myocyte + * mummergpu + * lud + * leukocyte + * lavaMD + * kmeans + * hotspot3D + * heartwall + * cfd + * bfs + * b+tree + +vecmathlib tests harness +------------------------ +https://bitbucket.org/eschnett/vecmathlib/wiki/Home + +PARSEC +------ +http://parsec.cs.princeton.edu/ + +Graph500 reference implementations +---------------------------------- +https://github.com/graph500/graph500/tree/v2-spec + +NAS Parallel Benchmarks +----------------------- +https://www.nas.nasa.gov/publications/npb.html + +The official benchmark is written in Fortran, but an unofficial +C-translation is available as well: +https://github.com/benchmark-subsetting/NPB3.0-omp-C + +DARPA HPCS SSCA#2 C/OpenMP reference implementation +--------------------------------------------------- +http://www.highproductivity.org/SSCABmks.htm + +This web site does not exist any more, but there seems to be a copy of +some of the benchmarks +https://github.com/gtcasl/hpc-benchmarks/tree/master/SSCA2v2.2 + +Kokkos +------ +https://github.com/kokkos/kokkos-kernels/tree/master/perf_test +https://github.com/kokkos/kokkos/tree/master/benchmarks + +PolyMage +-------- +https://github.com/bondhugula/polymage-benchmarks + +PolyBench +--------- +https://sourceforge.net/projects/polybench/ + +A modified version of Polybench 3.2 is already presented in +SingleSource/Benchmarks/Polybench. A newer version 4.2.1 is available. + +High Performance Geometric Multigrid +------------------------------------ +https://crd.lbl.gov/departments/computer-science/PAR/research/hpgmg/ + +RAJA Performance Suite +---------------------- +https://github.com/LLNL/RAJAPerf + +CORAL-2 Benchmarks +------------------ +https://asc.llnl.gov/coral-2-benchmarks/ + +Many of its programs have already been integreated in +MultiSource/Benchmarks/DOE-ProxyApps-C and +MultiSource/Benchmarks/DOE-ProxyApps-C++. + + * Nekbone + * QMCPack + * LAMMPS + * Kripke + * Quicksilver + * PENNANT + * Big Data Analytic Suite + * Deep Learning Suite + * Stream + * Stride + * ML/DL micro-benchmark + * Pynamic + * ACME + * VPIC + * Laghos + * Parallel Integer Sort + * Havoq + +NWChem +------ +http://www.nwchem-sw.org/index.php/Benchmarks + +TVM +---- +https://github.com/dmlc/tvm/tree/master/apps/benchmark + +HydroBench +---------- +https://github.com/HydroBench/Hydro + +ParRes +------ +https://github.com/ParRes/Kernels/tree/master/Cxx11 + +Applications/Libraries +====================== + +GnuPG +----- +https://gnupg.org/ + +Blitz++ +------- +https://sourceforge.net/projects/blitz/ + +FFmpeg +------ +https://ffmpeg.org/ + +FreePOOMA +--------- +http://www.nongnu.org/freepooma/ + +FTensors +-------- +http://www.wlandry.net/Projects/FTensor + +rawspeed +-------- +https://github.com/darktable-org/rawspeed + +Its test dataset is 756 MB in size, which is too large to be included +into the test-suite repository. + +Generic Algorithms +================== + +Image processing +---------------- + +Resampling +`````````` + + * Bilinear + * Bicubic + * Lanczos + +Dither +`````` + + * Threshold + * Random + * Halftone + * Bayer + * Floyd-Steinberg + * Jarvis + * Stucki + * Burkes + * Sierra + * Atkinson + * Gradient-based + +Feature detection +````````````````` + + * Harris + * Histogram of Oriented Gradients + +Color conversion +```````````````` + + * RGB to grayscale + * HSL to RGB + +Graph +----- + +Search Algorithms +````````````````` + + * Breadth-First-Search + * Depth-First-Search + * Dijkstra's algorithm + * A-Star + +Spanning Tree +````````````` + + * Kruskal's algorithm + * Prim's algorithm diff --git a/docs/index.rst b/docs/index.rst index 7edfdd241918c501279bc923e4690e999e89181e..df70de095bd94761c7d8c01ce6d18dfe609765b4 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -68,6 +68,7 @@ representation. CMakePrimer AdvancedBuilds HowToBuildOnARM + HowToBuildWithPGO HowToCrossCompileBuiltinsOnArm HowToCrossCompileLLVM CommandGuide/index @@ -107,6 +108,9 @@ representation. :doc:`HowToBuildOnARM` Notes on building and testing LLVM/Clang on ARM. +:doc:`HowToBuildWithPGO` + Notes on building LLVM/Clang with PGO. + :doc:`HowToCrossCompileBuiltinsOnArm` Notes on cross-building and testing the compiler-rt builtins for Arm. @@ -450,6 +454,7 @@ Information about LLVM's development process. Packaging ReleaseProcess Phabricator + BugLifeCycle :doc:`Contributing` An overview on how to contribute to LLVM. @@ -480,6 +485,9 @@ Information about LLVM's development process. Describes how to use the Phabricator code review tool hosted on http://reviews.llvm.org/ and its command line interface, Arcanist. +:doc:`BugLifeCycle` + Describes how bugs are reported, triaged and closed. + Community ========= @@ -559,6 +567,7 @@ can be better. CodeOfConduct Proposals/GitHubMove + Proposals/TestSuite Proposals/VectorizationPlan :doc:`CodeOfConduct` @@ -568,6 +577,9 @@ can be better. :doc:`Proposals/GitHubMove` Proposal to move from SVN/Git to GitHub. +:doc:`Proposals/TestSuite` + Proposals for additional benchmarks/programs for llvm's test-suite. + :doc:`Proposals/VectorizationPlan` Proposal to model the process and upgrade the infrastructure of LLVM's Loop Vectorizer. diff --git a/docs/tutorial/BuildingAJIT1.rst b/docs/tutorial/BuildingAJIT1.rst index 2b83df42fc247028cbe692746d46b94446f3f452..fcb755bd286f18bc8e5d3d30c2f683a21ad3f8ea 100644 --- a/docs/tutorial/BuildingAJIT1.rst +++ b/docs/tutorial/BuildingAJIT1.rst @@ -8,18 +8,19 @@ Building a JIT: Starting out with KaleidoscopeJIT Chapter 1 Introduction ====================== -**Warning: This text is currently out of date due to ORC API updates.** +**Warning: This tutorial is currently being updated to account for ORC API +changes. Only Chapters 1 and 2 are up-to-date.** -**The example code has been updated and can be used. The text will be updated -once the API churn dies down.** +**Example code from Chapters 3 to 5 will compile and run, but has not been +updated** Welcome to Chapter 1 of the "Building an ORC-based JIT in LLVM" tutorial. This tutorial runs through the implementation of a JIT compiler using LLVM's On-Request-Compilation (ORC) APIs. It begins with a simplified version of the KaleidoscopeJIT class used in the `Implementing a language with LLVM `_ tutorials and then -introduces new features like optimization, lazy compilation and remote -execution. +introduces new features like concurrent compilation, optimization, lazy +compilation and remote execution. The goal of this tutorial is to introduce you to LLVM's ORC JIT APIs, show how these APIs interact with other parts of LLVM, and to teach you how to recombine @@ -45,11 +46,9 @@ The structure of the tutorial is: - `Chapter #5 `_: Add process isolation by JITing code into a remote process with reduced privileges using the JIT Remote APIs. -To provide input for our JIT we will use the Kaleidoscope REPL from -`Chapter 7 `_ of the "Implementing a language in LLVM tutorial", -with one minor modification: We will remove the FunctionPassManager from the -code for that chapter and replace it with optimization support in our JIT class -in Chapter #2. +To provide input for our JIT we will use a lightly modified version of the +Kaleidoscope REPL from `Chapter 7 `_ of the "Implementing a +language in LLVM tutorial". Finally, a word on API generations: ORC is the 3rd generation of LLVM JIT API. It was preceded by MCJIT, and before that by the (now deleted) legacy JIT. @@ -63,32 +62,29 @@ JIT API Basics The purpose of a JIT compiler is to compile code "on-the-fly" as it is needed, rather than compiling whole programs to disk ahead of time as a traditional -compiler does. To support that aim our initial, bare-bones JIT API will be: +compiler does. To support that aim our initial, bare-bones JIT API will have +just two functions: -1. Handle addModule(Module &M) -- Make the given IR module available for - execution. -2. JITSymbol findSymbol(const std::string &Name) -- Search for pointers to +1. ``Error addModule(std::unique_ptr M)``: Make the given IR module + available for execution. +2. ``Expected lookup()``: Search for pointers to symbols (functions or variables) that have been added to the JIT. -3. void removeModule(Handle H) -- Remove a module from the JIT, releasing any - memory that had been used for the compiled code. A basic use-case for this API, executing the 'main' function from a module, will look like: .. code-block:: c++ - std::unique_ptr M = buildModule(); JIT J; - Handle H = J.addModule(*M); - int (*Main)(int, char*[]) = (int(*)(int, char*[]))J.getSymbolAddress("main"); + J.addModule(buildModule()); + auto *Main = (int(*)(int, char*[]))J.lookup("main").getAddress(); int Result = Main(); - J.removeModule(H); The APIs that we build in these tutorials will all be variations on this simple -theme. Behind the API we will refine the implementation of the JIT to add -support for optimization and lazy compilation. Eventually we will extend the -API itself to allow higher-level program representations (e.g. ASTs) to be -added to the JIT. +theme. Behind this API we will refine the implementation of the JIT to add +support for concurrent compilation, optimization and lazy compilation. +Eventually we will extend the API itself to allow higher-level program +representations (e.g. ASTs) to be added to the JIT. KaleidoscopeJIT =============== @@ -100,12 +96,10 @@ the REPL code from `Chapter 7 `_ of that tutorial to supply the input for our JIT: Each time the user enters an expression the REPL will add a new IR module containing the code for that expression to the JIT. If the expression is a top-level expression like '1+1' or 'sin(x)', the REPL will also -use the findSymbol method of our JIT class find and execute the code for the -expression, and then use the removeModule method to remove the code again -(since there's no way to re-invoke an anonymous expression). In later chapters -of this tutorial we'll modify the REPL to enable new interactions with our JIT -class, but for now we will take this setup for granted and focus our attention on -the implementation of our JIT itself. +use the lookup method of our JIT class find and execute the code for the +expression. In later chapters of this tutorial we will modify the REPL to enable +new interactions with our JIT class, but for now we will take this setup for +granted and focus our attention on the implementation of our JIT itself. Our KaleidoscopeJIT class is defined in the KaleidoscopeJIT.h header. After the usual include guards and #includes [2]_, we get to the definition of our class: @@ -115,216 +109,155 @@ usual include guards and #includes [2]_, we get to the definition of our class: #ifndef LLVM_EXECUTIONENGINE_ORC_KALEIDOSCOPEJIT_H #define LLVM_EXECUTIONENGINE_ORC_KALEIDOSCOPEJIT_H - #include "llvm/ADT/STLExtras.h" - #include "llvm/ExecutionEngine/ExecutionEngine.h" + #include "llvm/ADT/StringRef.h" #include "llvm/ExecutionEngine/JITSymbol.h" - #include "llvm/ExecutionEngine/RTDyldMemoryManager.h" - #include "llvm/ExecutionEngine/SectionMemoryManager.h" #include "llvm/ExecutionEngine/Orc/CompileUtils.h" + #include "llvm/ExecutionEngine/Orc/Core.h" + #include "llvm/ExecutionEngine/Orc/ExecutionUtils.h" #include "llvm/ExecutionEngine/Orc/IRCompileLayer.h" - #include "llvm/ExecutionEngine/Orc/LambdaResolver.h" + #include "llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h" #include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h" + #include "llvm/ExecutionEngine/SectionMemoryManager.h" #include "llvm/IR/DataLayout.h" - #include "llvm/IR/Mangler.h" - #include "llvm/Support/DynamicLibrary.h" - #include "llvm/Support/raw_ostream.h" - #include "llvm/Target/TargetMachine.h" - #include + #include "llvm/IR/LLVMContext.h" #include - #include - #include namespace llvm { namespace orc { class KaleidoscopeJIT { private: - std::unique_ptr TM; - const DataLayout DL; + ExecutionSession ES; RTDyldObjectLinkingLayer ObjectLayer; - IRCompileLayer CompileLayer; + IRCompileLayer CompileLayer; + + DataLayout DL; + MangleAndInterner Mangle; + ThreadSafeContext Ctx; public: - using ModuleHandle = decltype(CompileLayer)::ModuleHandleT; - -Our class begins with four members: A TargetMachine, TM, which will be used to -build our LLVM compiler instance; A DataLayout, DL, which will be used for -symbol mangling (more on that later), and two ORC *layers*: an -RTDyldObjectLinkingLayer and a CompileLayer. We'll be talking more about layers -in the next chapter, but for now you can think of them as analogous to LLVM -Passes: they wrap up useful JIT utilities behind an easy to compose interface. -The first layer, ObjectLayer, is the foundation of our JIT: it takes in-memory -object files produced by a compiler and links them on the fly to make them -executable. This JIT-on-top-of-a-linker design was introduced in MCJIT, however -the linker was hidden inside the MCJIT class. In ORC we expose the linker so -that clients can access and configure it directly if they need to. In this -tutorial our ObjectLayer will just be used to support the next layer in our -stack: the CompileLayer, which will be responsible for taking LLVM IR, compiling -it, and passing the resulting in-memory object files down to the object linking -layer below. - -That's it for member variables, after that we have a single typedef: -ModuleHandle. This is the handle type that will be returned from our JIT's -addModule method, and can be passed to the removeModule method to remove a -module. The IRCompileLayer class already provides a convenient handle type -(IRCompileLayer::ModuleHandleT), so we just alias our ModuleHandle to this. + KaleidoscopeJIT(JITTargetMachineBuilder JTMB, DataLayout DL) + : ObjectLayer(ES, + []() { return llvm::make_unique(); }), + CompileLayer(ES, ObjectLayer, ConcurrentIRCompiler(std::move(JTMB))), + DL(std::move(DL)), Mangle(ES, this->DL), + Ctx(llvm::make_unique()) { + ES.getMainJITDylib().setGenerator( + cantFail(DynamicLibrarySearchGenerator::GetForCurrentProcess(DL))); + } + +Our class begins with six member variables: An ExecutionSession member, ``ES``, +which provides context for our running JIT'd code (including the string pool, +global mutex, and error reporting facilities); An RTDyldObjectLinkingLayer, +``ObjectLayer``, that can be used to add object files to our JIT (though we will +not use it directly); An IRCompileLayer, ``CompileLayer``, that can be used to +add LLVM Modules to our JIT (and which builds on the ObjectLayer), A DataLayout +and MangleAndInterner, ``DL`` and ``Mangle``, that will be used for symbol mangling +(more on that later); and finally an LLVMContext that clients will use when +building IR files for the JIT. + +Next up we have our class constructor, which takes a `JITTargetMachineBuilder`` +that will be used by our IRCompiler, and a ``DataLayout`` that we will use to +initialize our DL member. The constructor begins by initializing our +ObjectLayer. The ObjectLayer requires a reference to the ExecutionSession, and +a function object that will build a JIT memory manager for each module that is +added (a JIT memory manager manages memory allocations, memory permissions, and +registration of exception handlers for JIT'd code). For this we use a lambda +that returns a SectionMemoryManager, an off-the-shelf utility that provides all +the basic memory management functionality required for this chapter. Next we +initialize our CompileLayer. The CompileLayer needs three things: (1) A +reference to the ExecutionSession, (2) A reference to our object layer, and (3) +a compiler instance to use to perform the actual compilation from IR to object +files. We use the off-the-shelf ConcurrentIRCompiler utility as our compiler, +which we construct using this constructor's JITTargetMachineBuilder argument. +The ConcurrentIRCompiler utility will use the JITTargetMachineBuilder to build +llvm TargetMachines (which are not thread safe) as needed for compiles. After +this, we initialize our supporting members: ``DL``, ``Mangler`` and ``Ctx`` with +the input DataLayout, the ExecutionSession and DL member, and a new default +constucted LLVMContext respectively. Now that our members have been initialized, +so the one thing that remains to do is to tweak the configuration of the +*JITDylib* that we will store our code in. We want to modify this dylib to +contain not only the symbols that we add to it, but also the symbols from our +REPL process as well. We do this by attaching a +``DynamicLibrarySearchGenerator`` instance using the +``DynamicLibrarySearchGenerator::GetForCurrentProcess`` method. + .. code-block:: c++ - KaleidoscopeJIT() - : TM(EngineBuilder().selectTarget()), DL(TM->createDataLayout()), - ObjectLayer([]() { return std::make_shared(); }), - CompileLayer(ObjectLayer, SimpleCompiler(*TM)) { - llvm::sys::DynamicLibrary::LoadLibraryPermanently(nullptr); - } + static Expected> Create() { + auto JTMB = JITTargetMachineBuilder::detectHost(); - TargetMachine &getTargetMachine() { return *TM; } - -Next up we have our class constructor. We begin by initializing TM using the -EngineBuilder::selectTarget helper method which constructs a TargetMachine for -the current process. Then we use our newly created TargetMachine to initialize -DL, our DataLayout. After that we need to initialize our ObjectLayer. The -ObjectLayer requires a function object that will build a JIT memory manager for -each module that is added (a JIT memory manager manages memory allocations, -memory permissions, and registration of exception handlers for JIT'd code). For -this we use a lambda that returns a SectionMemoryManager, an off-the-shelf -utility that provides all the basic memory management functionality required for -this chapter. Next we initialize our CompileLayer. The CompileLayer needs two -things: (1) A reference to our object layer, and (2) a compiler instance to use -to perform the actual compilation from IR to object files. We use the -off-the-shelf SimpleCompiler instance for now. Finally, in the body of the -constructor, we call the DynamicLibrary::LoadLibraryPermanently method with a -nullptr argument. Normally the LoadLibraryPermanently method is called with the -path of a dynamic library to load, but when passed a null pointer it will 'load' -the host process itself, making its exported symbols available for execution. + if (!JTMB) + return JTMB.takeError(); -.. code-block:: c++ + auto DL = JTMB->getDefaultDataLayoutForTarget(); + if (!DL) + return DL.takeError(); - ModuleHandle addModule(std::unique_ptr M) { - // Build our symbol resolver: - // Lambda 1: Look back into the JIT itself to find symbols that are part of - // the same "logical dylib". - // Lambda 2: Search for external symbols in the host process. - auto Resolver = createLambdaResolver( - [&](const std::string &Name) { - if (auto Sym = CompileLayer.findSymbol(Name, false)) - return Sym; - return JITSymbol(nullptr); - }, - [](const std::string &Name) { - if (auto SymAddr = - RTDyldMemoryManager::getSymbolAddressInProcess(Name)) - return JITSymbol(SymAddr, JITSymbolFlags::Exported); - return JITSymbol(nullptr); - }); - - // Add the set to the JIT with the resolver we created above and a newly - // created SectionMemoryManager. - return cantFail(CompileLayer.addModule(std::move(M), - std::move(Resolver))); + return llvm::make_unique(std::move(*JTMB), std::move(*DL)); } -Now we come to the first of our JIT API methods: addModule. This method is -responsible for adding IR to the JIT and making it available for execution. In -this initial implementation of our JIT we will make our modules "available for -execution" by adding them straight to the CompileLayer, which will immediately -compile them. In later chapters we will teach our JIT to defer compilation -of individual functions until they're actually called. - -To add our module to the CompileLayer we need to supply both the module and a -symbol resolver. The symbol resolver is responsible for supplying the JIT with -an address for each *external symbol* in the module we are adding. External -symbols are any symbol not defined within the module itself, including calls to -functions outside the JIT and calls to functions defined in other modules that -have already been added to the JIT. (It may seem as though modules added to the -JIT should know about one another by default, but since we would still have to -supply a symbol resolver for references to code outside the JIT it turns out to -be easier to re-use this one mechanism for all symbol resolution.) This has the -added benefit that the user has full control over the symbol resolution -process. Should we search for definitions within the JIT first, then fall back -on external definitions? Or should we prefer external definitions where -available and only JIT code if we don't already have an available -implementation? By using a single symbol resolution scheme we are free to choose -whatever makes the most sense for any given use case. - -Building a symbol resolver is made especially easy by the *createLambdaResolver* -function. This function takes two lambdas [3]_ and returns a JITSymbolResolver -instance. The first lambda is used as the implementation of the resolver's -findSymbolInLogicalDylib method, which searches for symbol definitions that -should be thought of as being part of the same "logical" dynamic library as this -Module. If you are familiar with static linking: this means that -findSymbolInLogicalDylib should expose symbols with common linkage and hidden -visibility. If all this sounds foreign you can ignore the details and just -remember that this is the first method that the linker will use to try to find a -symbol definition. If the findSymbolInLogicalDylib method returns a null result -then the linker will call the second symbol resolver method, called findSymbol, -which searches for symbols that should be thought of as external to (but -visibile from) the module and its logical dylib. In this tutorial we will adopt -the following simple scheme: All modules added to the JIT will behave as if they -were linked into a single, ever-growing logical dylib. To implement this our -first lambda (the one defining findSymbolInLogicalDylib) will just search for -JIT'd code by calling the CompileLayer's findSymbol method. If we don't find a -symbol in the JIT itself we'll fall back to our second lambda, which implements -findSymbol. This will use the RTDyldMemoryManager::getSymbolAddressInProcess -method to search for the symbol within the program itself. If we can't find a -symbol definition via either of these paths, the JIT will refuse to accept our -module, returning a "symbol not found" error. - -Now that we've built our symbol resolver, we're ready to add our module to the -JIT. We do this by calling the CompileLayer's addModule method. The addModule -method returns an ``Expected``, since in more -advanced JIT configurations it could fail. In our basic configuration we know -that it will always succeed so we use the cantFail utility to assert that no -error occurred, and extract the handle value. Since we have already typedef'd -our ModuleHandle type to be the same as the CompileLayer's handle type, we can -return the unwrapped handle directly. + const DataLayout &getDataLayout() const { return DL; } -.. code-block:: c++ + LLVMContext &getContext() { return *Ctx.getContext(); } - JITSymbol findSymbol(const std::string Name) { - std::string MangledName; - raw_string_ostream MangledNameStream(MangledName); - Mangler::getNameWithPrefix(MangledNameStream, Name, DL); - return CompileLayer.findSymbol(MangledNameStream.str(), true); - } +Next we have a named constructor, ``Create``, which will build a KaleidoscopeJIT +instance that is configured to generate code for our host process. It does this +by first generating a JITTargetMachineBuilder instance using that clases's +detectHost method and then using that instance to generate a datalayout for +the target process. Each of these operations can fail, so each returns its +result wrapped in an Expected value [3]_ that we must check for error before +continuing. If both operations succeed we can unwrap their results (using the +dereference operator) and pass them into KaleidoscopeJIT's constructor on the +last line of the function. + +Following the named constructor we have the ``getDataLayout()`` and +``getContext()`` methods. These are used to make data structures created and +managed by the JIT (especially the LLVMContext) available to the REPL code that +will build our IR modules. - JITTargetAddress getSymbolAddress(const std::string Name) { - return cantFail(findSymbol(Name).getAddress()); +.. code-block:: c++ + + void addModule(std::unique_ptr M) { + cantFail(CompileLayer.add(ES.getMainJITDylib(), + ThreadSafeModule(std::move(M), Ctx))); } - void removeModule(ModuleHandle H) { - cantFail(CompileLayer.removeModule(H)); + Expected lookup(StringRef Name) { + return ES.lookup({&ES.getMainJITDylib()}, Mangle(Name.str())); } -Now that we can add code to our JIT, we need a way to find the symbols we've -added to it. To do that we call the findSymbol method on our CompileLayer, but -with a twist: We have to *mangle* the name of the symbol we're searching for -first. The ORC JIT components use mangled symbols internally the same way a -static compiler and linker would, rather than using plain IR symbol names. This -allows JIT'd code to interoperate easily with precompiled code in the -application or shared libraries. The kind of mangling will depend on the -DataLayout, which in turn depends on the target platform. To allow us to remain -portable and search based on the un-mangled name, we just re-produce this -mangling ourselves. - -Next we have a convenience function, getSymbolAddress, which returns the address -of a given symbol. Like CompileLayer's addModule function, JITSymbol's getAddress -function is allowed to fail [4]_, however we know that it will not in our simple -example, so we wrap it in a call to cantFail. - -We now come to the last method in our JIT API: removeModule. This method is -responsible for destructing the MemoryManager and SymbolResolver that were -added with a given module, freeing any resources they were using in the -process. In our Kaleidoscope demo we rely on this method to remove the module -representing the most recent top-level expression, preventing it from being -treated as a duplicate definition when the next top-level expression is -entered. It is generally good to free any module that you know you won't need -to call further, just to free up the resources dedicated to it. However, you -don't strictly need to do this: All resources will be cleaned up when your -JIT class is destructed, if they haven't been freed before then. Like -``CompileLayer::addModule`` and ``JITSymbol::getAddress``, removeModule may -fail in general but will never fail in our example, so we wrap it in a call to -cantFail. +Now we come to the first of our JIT API methods: addModule. This method is +responsible for adding IR to the JIT and making it available for execution. In +this initial implementation of our JIT we will make our modules "available for +execution" by adding them to the CompileLayer, which will it turn store the +Module in the main JITDylib. This process will create new symbol table entries +in the JITDylib for each definition in the module, and will defer compilation of +the module until any of its definitions is looked up. Note that this is not lazy +compilation: just referencing a definition, even if it is never used, will be +enough to trigger compilation. In later chapters we will teach our JIT to defer +compilation of functions until they're actually called. To add our Module we +must first wrap it in a ThreadSafeModule instance, which manages the lifetime of +the Module's LLVMContext (our Ctx member) in a thread-friendly way. In our +example, all modules will share the Ctx member, which will exist for the +duration of the JIT. Once we switch to concurrent compilation in later chapters +we will use a new context per module. + +Our last method is ``lookup``, which allows us to look up addresses for +function and variable definitions added to the JIT based on their symbol names. +As noted above, lookup will implicitly trigger compilation for any symbol +that has not already been compiled. Our lookup method calls through to +`ExecutionSession::lookup`, passing in a list of dylibs to search (in our case +just the main dylib), and the symbol name to search for, with a twist: We have +to *mangle* the name of the symbol we're searching for first. The ORC JIT +components use mangled symbols internally the same way a static compiler and +linker would, rather than using plain IR symbol names. This allows JIT'd code +to interoperate easily with precompiled code in the application or shared +libraries. The kind of mangling will depend on the DataLayout, which in turn +depends on the target platform. To allow us to remain portable and search based +on the un-mangled name, we just re-produce this mangling ourselves using our +``Mangle`` member function object. This brings us to the end of Chapter 1 of Building a JIT. You now have a basic but fully functioning JIT stack that you can use to take LLVM IR and make it @@ -362,42 +295,29 @@ Here is the code: .. [2] +-----------------------------+-----------------------------------------------+ | File | Reason for inclusion | +=============================+===============================================+ - | STLExtras.h | LLVM utilities that are useful when working | - | | with the STL. | + | JITSymbol.h | Defines the lookup result type | + | | JITEvaluatedSymbol | +-----------------------------+-----------------------------------------------+ - | ExecutionEngine.h | Access to the EngineBuilder::selectTarget | - | | method. | + | CompileUtils.h | Provides the SimpleCompiler class. | +-----------------------------+-----------------------------------------------+ - | | Access to the | - | RTDyldMemoryManager.h | RTDyldMemoryManager::getSymbolAddressInProcess| - | | method. | + | Core.h | Core utilities such as ExecutionSession and | + | | JITDylib. | +-----------------------------+-----------------------------------------------+ - | CompileUtils.h | Provides the SimpleCompiler class. | + | ExecutionUtils.h | Provides the DynamicLibrarySearchGenerator | + | | class. | +-----------------------------+-----------------------------------------------+ - | IRCompileLayer.h | Provides the IRCompileLayer class. | + | IRCompileLayer.h | Provides the IRCompileLayer class. | +-----------------------------+-----------------------------------------------+ - | | Access the createLambdaResolver function, | - | LambdaResolver.h | which provides easy construction of symbol | - | | resolvers. | + | JITTargetMachineBuilder.h | Provides the JITTargetMachineBuilder class. | +-----------------------------+-----------------------------------------------+ - | RTDyldObjectLinkingLayer.h | Provides the RTDyldObjectLinkingLayer class. | + | RTDyldObjectLinkingLayer.h | Provides the RTDyldObjectLinkingLayer class. | +-----------------------------+-----------------------------------------------+ - | Mangler.h | Provides the Mangler class for platform | - | | specific name-mangling. | + | SectionMemoryManager.h | Provides the SectionMemoryManager class. | +-----------------------------+-----------------------------------------------+ - | DynamicLibrary.h | Provides the DynamicLibrary class, which | - | | makes symbols in the host process searchable. | + | DataLayout.h | Provides the DataLayout class. | +-----------------------------+-----------------------------------------------+ - | | A fast output stream class. We use the | - | raw_ostream.h | raw_string_ostream subclass for symbol | - | | mangling | + | LLVMContext.h | Provides the LLVMContext class. | +-----------------------------+-----------------------------------------------+ - | TargetMachine.h | LLVM target machine description class. | - +-----------------------------+-----------------------------------------------+ - -.. [3] Actually they don't have to be lambdas, any object with a call operator - will do, including plain old functions or std::functions. -.. [4] ``JITSymbol::getAddress`` will force the JIT to compile the definition of - the symbol if it hasn't already been compiled, and since the compilation - process could fail getAddress must be able to return this failure. +.. [3] See the ErrorHandling section in the LLVM Programmer's Manual + (http://llvm.org/docs/ProgrammersManual.html#error-handling) \ No newline at end of file diff --git a/examples/Kaleidoscope/BuildingAJIT/Chapter1/KaleidoscopeJIT.h b/examples/Kaleidoscope/BuildingAJIT/Chapter1/KaleidoscopeJIT.h index 0b8bb381d08a76c5167de8a2cb174a56ad32f670..1df5aff086935677b114ae4d2f68198fc325deed 100644 --- a/examples/Kaleidoscope/BuildingAJIT/Chapter1/KaleidoscopeJIT.h +++ b/examples/Kaleidoscope/BuildingAJIT/Chapter1/KaleidoscopeJIT.h @@ -14,24 +14,18 @@ #ifndef LLVM_EXECUTIONENGINE_ORC_KALEIDOSCOPEJIT_H #define LLVM_EXECUTIONENGINE_ORC_KALEIDOSCOPEJIT_H -#include "llvm/ADT/STLExtras.h" -#include "llvm/ExecutionEngine/ExecutionEngine.h" +#include "llvm/ADT/StringRef.h" #include "llvm/ExecutionEngine/JITSymbol.h" -#include "llvm/ExecutionEngine/RTDyldMemoryManager.h" -#include "llvm/ExecutionEngine/SectionMemoryManager.h" #include "llvm/ExecutionEngine/Orc/CompileUtils.h" +#include "llvm/ExecutionEngine/Orc/Core.h" +#include "llvm/ExecutionEngine/Orc/ExecutionUtils.h" #include "llvm/ExecutionEngine/Orc/IRCompileLayer.h" -#include "llvm/ExecutionEngine/Orc/LambdaResolver.h" +#include "llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h" #include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h" +#include "llvm/ExecutionEngine/SectionMemoryManager.h" #include "llvm/IR/DataLayout.h" -#include "llvm/IR/Mangler.h" -#include "llvm/Support/DynamicLibrary.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetMachine.h" -#include +#include "llvm/IR/LLVMContext.h" #include -#include -#include namespace llvm { namespace orc { @@ -39,59 +33,48 @@ namespace orc { class KaleidoscopeJIT { private: ExecutionSession ES; - std::shared_ptr Resolver; - std::unique_ptr TM; - const DataLayout DL; RTDyldObjectLinkingLayer ObjectLayer; - IRCompileLayer CompileLayer; + IRCompileLayer CompileLayer; + + DataLayout DL; + MangleAndInterner Mangle; + ThreadSafeContext Ctx; public: - KaleidoscopeJIT() - : Resolver(createLegacyLookupResolver( - ES, - [this](const std::string &Name) -> JITSymbol { - if (auto Sym = CompileLayer.findSymbol(Name, false)) - return Sym; - else if (auto Err = Sym.takeError()) - return std::move(Err); - if (auto SymAddr = - RTDyldMemoryManager::getSymbolAddressInProcess(Name)) - return JITSymbol(SymAddr, JITSymbolFlags::Exported); - return nullptr; - }, - [](Error Err) { cantFail(std::move(Err), "lookupFlags failed"); })), - TM(EngineBuilder().selectTarget()), DL(TM->createDataLayout()), - ObjectLayer(ES, - [this](VModuleKey) { - return RTDyldObjectLinkingLayer::Resources{ - std::make_shared(), Resolver}; - }), - CompileLayer(ObjectLayer, SimpleCompiler(*TM)) { - llvm::sys::DynamicLibrary::LoadLibraryPermanently(nullptr); + KaleidoscopeJIT(JITTargetMachineBuilder JTMB, DataLayout DL) + : ObjectLayer(ES, + []() { return llvm::make_unique(); }), + CompileLayer(ES, ObjectLayer, ConcurrentIRCompiler(std::move(JTMB))), + DL(std::move(DL)), Mangle(ES, this->DL), + Ctx(llvm::make_unique()) { + ES.getMainJITDylib().setGenerator( + cantFail(DynamicLibrarySearchGenerator::GetForCurrentProcess(DL))); } - TargetMachine &getTargetMachine() { return *TM; } + static Expected> Create() { + auto JTMB = JITTargetMachineBuilder::detectHost(); - VModuleKey addModule(std::unique_ptr M) { - // Add the module to the JIT with a new VModuleKey. - auto K = ES.allocateVModule(); - cantFail(CompileLayer.addModule(K, std::move(M))); - return K; - } + if (!JTMB) + return JTMB.takeError(); - JITSymbol findSymbol(const std::string Name) { - std::string MangledName; - raw_string_ostream MangledNameStream(MangledName); - Mangler::getNameWithPrefix(MangledNameStream, Name, DL); - return CompileLayer.findSymbol(MangledNameStream.str(), true); + auto DL = JTMB->getDefaultDataLayoutForTarget(); + if (!DL) + return DL.takeError(); + + return llvm::make_unique(std::move(*JTMB), std::move(*DL)); } - JITTargetAddress getSymbolAddress(const std::string Name) { - return cantFail(findSymbol(Name).getAddress()); + const DataLayout &getDataLayout() const { return DL; } + + LLVMContext &getContext() { return *Ctx.getContext(); } + + Error addModule(std::unique_ptr M) { + return CompileLayer.add(ES.getMainJITDylib(), + ThreadSafeModule(std::move(M), Ctx)); } - void removeModule(VModuleKey K) { - cantFail(CompileLayer.removeModule(K)); + Expected lookup(StringRef Name) { + return ES.lookup({&ES.getMainJITDylib()}, Mangle(Name.str())); } }; diff --git a/examples/Kaleidoscope/BuildingAJIT/Chapter1/toy.cpp b/examples/Kaleidoscope/BuildingAJIT/Chapter1/toy.cpp index 7652e80c69a1c57c14f15c34cf727dee3fec4f90..5a66b367c27368a8d383cb9ad8685e4b4cdccee8 100644 --- a/examples/Kaleidoscope/BuildingAJIT/Chapter1/toy.cpp +++ b/examples/Kaleidoscope/BuildingAJIT/Chapter1/toy.cpp @@ -676,10 +676,11 @@ static std::unique_ptr ParseDefinition() { } /// toplevelexpr ::= expression -static std::unique_ptr ParseTopLevelExpr() { +static std::unique_ptr ParseTopLevelExpr(unsigned ExprCount) { if (auto E = ParseExpression()) { // Make an anonymous proto. - auto Proto = llvm::make_unique("__anon_expr", + auto Proto = llvm::make_unique(("__anon_expr" + + Twine(ExprCount)).str(), std::vector()); return llvm::make_unique(std::move(Proto), std::move(E)); } @@ -696,12 +697,13 @@ static std::unique_ptr ParseExtern() { // Code Generation //===----------------------------------------------------------------------===// -static LLVMContext TheContext; -static IRBuilder<> Builder(TheContext); +static std::unique_ptr TheJIT; +static LLVMContext *TheContext; +static std::unique_ptr> Builder; static std::unique_ptr TheModule; static std::map NamedValues; -static std::unique_ptr TheJIT; static std::map> FunctionProtos; +static ExitOnError ExitOnErr; Value *LogErrorV(const char *Str) { LogError(Str); @@ -729,11 +731,11 @@ static AllocaInst *CreateEntryBlockAlloca(Function *TheFunction, const std::string &VarName) { IRBuilder<> TmpB(&TheFunction->getEntryBlock(), TheFunction->getEntryBlock().begin()); - return TmpB.CreateAlloca(Type::getDoubleTy(TheContext), nullptr, VarName); + return TmpB.CreateAlloca(Type::getDoubleTy(*TheContext), nullptr, VarName); } Value *NumberExprAST::codegen() { - return ConstantFP::get(TheContext, APFloat(Val)); + return ConstantFP::get(*TheContext, APFloat(Val)); } Value *VariableExprAST::codegen() { @@ -743,7 +745,7 @@ Value *VariableExprAST::codegen() { return LogErrorV("Unknown variable name"); // Load the value. - return Builder.CreateLoad(V, Name.c_str()); + return Builder->CreateLoad(V, Name.c_str()); } Value *UnaryExprAST::codegen() { @@ -755,7 +757,7 @@ Value *UnaryExprAST::codegen() { if (!F) return LogErrorV("Unknown unary operator"); - return Builder.CreateCall(F, OperandV, "unop"); + return Builder->CreateCall(F, OperandV, "unop"); } Value *BinaryExprAST::codegen() { @@ -778,7 +780,7 @@ Value *BinaryExprAST::codegen() { if (!Variable) return LogErrorV("Unknown variable name"); - Builder.CreateStore(Val, Variable); + Builder->CreateStore(Val, Variable); return Val; } @@ -789,15 +791,15 @@ Value *BinaryExprAST::codegen() { switch (Op) { case '+': - return Builder.CreateFAdd(L, R, "addtmp"); + return Builder->CreateFAdd(L, R, "addtmp"); case '-': - return Builder.CreateFSub(L, R, "subtmp"); + return Builder->CreateFSub(L, R, "subtmp"); case '*': - return Builder.CreateFMul(L, R, "multmp"); + return Builder->CreateFMul(L, R, "multmp"); case '<': - L = Builder.CreateFCmpULT(L, R, "cmptmp"); + L = Builder->CreateFCmpULT(L, R, "cmptmp"); // Convert bool 0/1 to double 0.0 or 1.0 - return Builder.CreateUIToFP(L, Type::getDoubleTy(TheContext), "booltmp"); + return Builder->CreateUIToFP(L, Type::getDoubleTy(*TheContext), "booltmp"); default: break; } @@ -808,7 +810,7 @@ Value *BinaryExprAST::codegen() { assert(F && "binary operator not found!"); Value *Ops[] = {L, R}; - return Builder.CreateCall(F, Ops, "binop"); + return Builder->CreateCall(F, Ops, "binop"); } Value *CallExprAST::codegen() { @@ -828,7 +830,7 @@ Value *CallExprAST::codegen() { return nullptr; } - return Builder.CreateCall(CalleeF, ArgsV, "calltmp"); + return Builder->CreateCall(CalleeF, ArgsV, "calltmp"); } Value *IfExprAST::codegen() { @@ -837,46 +839,46 @@ Value *IfExprAST::codegen() { return nullptr; // Convert condition to a bool by comparing equal to 0.0. - CondV = Builder.CreateFCmpONE( - CondV, ConstantFP::get(TheContext, APFloat(0.0)), "ifcond"); + CondV = Builder->CreateFCmpONE( + CondV, ConstantFP::get(*TheContext, APFloat(0.0)), "ifcond"); - Function *TheFunction = Builder.GetInsertBlock()->getParent(); + Function *TheFunction = Builder->GetInsertBlock()->getParent(); // Create blocks for the then and else cases. Insert the 'then' block at the // end of the function. - BasicBlock *ThenBB = BasicBlock::Create(TheContext, "then", TheFunction); - BasicBlock *ElseBB = BasicBlock::Create(TheContext, "else"); - BasicBlock *MergeBB = BasicBlock::Create(TheContext, "ifcont"); + BasicBlock *ThenBB = BasicBlock::Create(*TheContext, "then", TheFunction); + BasicBlock *ElseBB = BasicBlock::Create(*TheContext, "else"); + BasicBlock *MergeBB = BasicBlock::Create(*TheContext, "ifcont"); - Builder.CreateCondBr(CondV, ThenBB, ElseBB); + Builder->CreateCondBr(CondV, ThenBB, ElseBB); // Emit then value. - Builder.SetInsertPoint(ThenBB); + Builder->SetInsertPoint(ThenBB); Value *ThenV = Then->codegen(); if (!ThenV) return nullptr; - Builder.CreateBr(MergeBB); + Builder->CreateBr(MergeBB); // Codegen of 'Then' can change the current block, update ThenBB for the PHI. - ThenBB = Builder.GetInsertBlock(); + ThenBB = Builder->GetInsertBlock(); // Emit else block. TheFunction->getBasicBlockList().push_back(ElseBB); - Builder.SetInsertPoint(ElseBB); + Builder->SetInsertPoint(ElseBB); Value *ElseV = Else->codegen(); if (!ElseV) return nullptr; - Builder.CreateBr(MergeBB); + Builder->CreateBr(MergeBB); // Codegen of 'Else' can change the current block, update ElseBB for the PHI. - ElseBB = Builder.GetInsertBlock(); + ElseBB = Builder->GetInsertBlock(); // Emit merge block. TheFunction->getBasicBlockList().push_back(MergeBB); - Builder.SetInsertPoint(MergeBB); - PHINode *PN = Builder.CreatePHI(Type::getDoubleTy(TheContext), 2, "iftmp"); + Builder->SetInsertPoint(MergeBB); + PHINode *PN = Builder->CreatePHI(Type::getDoubleTy(*TheContext), 2, "iftmp"); PN->addIncoming(ThenV, ThenBB); PN->addIncoming(ElseV, ElseBB); @@ -903,7 +905,7 @@ Value *IfExprAST::codegen() { // br endcond, loop, endloop // outloop: Value *ForExprAST::codegen() { - Function *TheFunction = Builder.GetInsertBlock()->getParent(); + Function *TheFunction = Builder->GetInsertBlock()->getParent(); // Create an alloca for the variable in the entry block. AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName); @@ -914,17 +916,17 @@ Value *ForExprAST::codegen() { return nullptr; // Store the value into the alloca. - Builder.CreateStore(StartVal, Alloca); + Builder->CreateStore(StartVal, Alloca); // Make the new basic block for the loop header, inserting after current // block. - BasicBlock *LoopBB = BasicBlock::Create(TheContext, "loop", TheFunction); + BasicBlock *LoopBB = BasicBlock::Create(*TheContext, "loop", TheFunction); // Insert an explicit fall through from the current block to the LoopBB. - Builder.CreateBr(LoopBB); + Builder->CreateBr(LoopBB); // Start insertion in LoopBB. - Builder.SetInsertPoint(LoopBB); + Builder->SetInsertPoint(LoopBB); // Within the loop, the variable is defined equal to the PHI node. If it // shadows an existing variable, we have to restore it, so save it now. @@ -945,7 +947,7 @@ Value *ForExprAST::codegen() { return nullptr; } else { // If not specified, use 1.0. - StepVal = ConstantFP::get(TheContext, APFloat(1.0)); + StepVal = ConstantFP::get(*TheContext, APFloat(1.0)); } // Compute the end condition. @@ -955,23 +957,23 @@ Value *ForExprAST::codegen() { // Reload, increment, and restore the alloca. This handles the case where // the body of the loop mutates the variable. - Value *CurVar = Builder.CreateLoad(Alloca, VarName.c_str()); - Value *NextVar = Builder.CreateFAdd(CurVar, StepVal, "nextvar"); - Builder.CreateStore(NextVar, Alloca); + Value *CurVar = Builder->CreateLoad(Alloca, VarName.c_str()); + Value *NextVar = Builder->CreateFAdd(CurVar, StepVal, "nextvar"); + Builder->CreateStore(NextVar, Alloca); // Convert condition to a bool by comparing equal to 0.0. - EndCond = Builder.CreateFCmpONE( - EndCond, ConstantFP::get(TheContext, APFloat(0.0)), "loopcond"); + EndCond = Builder->CreateFCmpONE( + EndCond, ConstantFP::get(*TheContext, APFloat(0.0)), "loopcond"); // Create the "after loop" block and insert it. BasicBlock *AfterBB = - BasicBlock::Create(TheContext, "afterloop", TheFunction); + BasicBlock::Create(*TheContext, "afterloop", TheFunction); // Insert the conditional branch into the end of LoopEndBB. - Builder.CreateCondBr(EndCond, LoopBB, AfterBB); + Builder->CreateCondBr(EndCond, LoopBB, AfterBB); // Any new code will be inserted in AfterBB. - Builder.SetInsertPoint(AfterBB); + Builder->SetInsertPoint(AfterBB); // Restore the unshadowed variable. if (OldVal) @@ -980,13 +982,13 @@ Value *ForExprAST::codegen() { NamedValues.erase(VarName); // for expr always returns 0.0. - return Constant::getNullValue(Type::getDoubleTy(TheContext)); + return Constant::getNullValue(Type::getDoubleTy(*TheContext)); } Value *VarExprAST::codegen() { std::vector OldBindings; - Function *TheFunction = Builder.GetInsertBlock()->getParent(); + Function *TheFunction = Builder->GetInsertBlock()->getParent(); // Register all variables and emit their initializer. for (unsigned i = 0, e = VarNames.size(); i != e; ++i) { @@ -1004,11 +1006,11 @@ Value *VarExprAST::codegen() { if (!InitVal) return nullptr; } else { // If not specified, use 0.0. - InitVal = ConstantFP::get(TheContext, APFloat(0.0)); + InitVal = ConstantFP::get(*TheContext, APFloat(0.0)); } AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName); - Builder.CreateStore(InitVal, Alloca); + Builder->CreateStore(InitVal, Alloca); // Remember the old variable binding so that we can restore the binding when // we unrecurse. @@ -1033,9 +1035,9 @@ Value *VarExprAST::codegen() { Function *PrototypeAST::codegen() { // Make the function type: double(double,double) etc. - std::vector Doubles(Args.size(), Type::getDoubleTy(TheContext)); + std::vector Doubles(Args.size(), Type::getDoubleTy(*TheContext)); FunctionType *FT = - FunctionType::get(Type::getDoubleTy(TheContext), Doubles, false); + FunctionType::get(Type::getDoubleTy(*TheContext), Doubles, false); Function *F = Function::Create(FT, Function::ExternalLinkage, Name, TheModule.get()); @@ -1062,8 +1064,8 @@ Function *FunctionAST::codegen() { BinopPrecedence[P.getOperatorName()] = P.getBinaryPrecedence(); // Create a new basic block to start insertion into. - BasicBlock *BB = BasicBlock::Create(TheContext, "entry", TheFunction); - Builder.SetInsertPoint(BB); + BasicBlock *BB = BasicBlock::Create(*TheContext, "entry", TheFunction); + Builder->SetInsertPoint(BB); // Record the function arguments in the NamedValues map. NamedValues.clear(); @@ -1072,7 +1074,7 @@ Function *FunctionAST::codegen() { AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, Arg.getName()); // Store the initial value into the alloca. - Builder.CreateStore(&Arg, Alloca); + Builder->CreateStore(&Arg, Alloca); // Add arguments to variable symbol table. NamedValues[Arg.getName()] = Alloca; @@ -1080,7 +1082,7 @@ Function *FunctionAST::codegen() { if (Value *RetVal = Body->codegen()) { // Finish off the function. - Builder.CreateRet(RetVal); + Builder->CreateRet(RetVal); // Validate the generated code, checking for consistency. verifyFunction(*TheFunction); @@ -1102,8 +1104,11 @@ Function *FunctionAST::codegen() { static void InitializeModule() { // Open a new module. - TheModule = llvm::make_unique("my cool jit", TheContext); - TheModule->setDataLayout(TheJIT->getTargetMachine().createDataLayout()); + TheModule = llvm::make_unique("my cool jit", *TheContext); + TheModule->setDataLayout(TheJIT->getDataLayout()); + + // Create a new builder for the module. + Builder = llvm::make_unique>(*TheContext); } static void HandleDefinition() { @@ -1112,7 +1117,7 @@ static void HandleDefinition() { fprintf(stderr, "Read function definition:"); FnIR->print(errs()); fprintf(stderr, "\n"); - TheJIT->addModule(std::move(TheModule)); + ExitOnErr(TheJIT->addModule(std::move(TheModule))); InitializeModule(); } } else { @@ -1136,23 +1141,27 @@ static void HandleExtern() { } static void HandleTopLevelExpression() { + static unsigned ExprCount = 0; + + // Update ExprCount. This number will be added to anonymous expressions to + // prevent them from clashing. + ++ExprCount; + // Evaluate a top-level expression into an anonymous function. - if (auto FnAST = ParseTopLevelExpr()) { + if (auto FnAST = ParseTopLevelExpr(ExprCount)) { if (FnAST->codegen()) { // JIT the module containing the anonymous expression, keeping a handle so // we can free it later. - auto H = TheJIT->addModule(std::move(TheModule)); + ExitOnErr(TheJIT->addModule(std::move(TheModule))); InitializeModule(); - // Get the anonymous expression's address and cast it to the right type, - // double(*)(), so we can call it as a native function. - double (*FP)() = - (double (*)())(intptr_t)TheJIT->getSymbolAddress("__anon_expr"); + // Get the anonymous expression's JITSymbol. + auto Sym = + ExitOnErr(TheJIT->lookup(("__anon_expr" + Twine(ExprCount)).str())); + + auto *FP = (double (*)())(intptr_t)Sym.getAddress(); assert(FP && "Failed to codegen function"); fprintf(stderr, "Evaluated to %f\n", FP()); - - // Delete the anonymous expression module from the JIT. - TheJIT->removeModule(H); } } else { // Skip token for error recovery. @@ -1220,7 +1229,8 @@ int main() { fprintf(stderr, "ready> "); getNextToken(); - TheJIT = llvm::make_unique(); + TheJIT = ExitOnErr(KaleidoscopeJIT::Create()); + TheContext = &TheJIT->getContext(); InitializeModule(); diff --git a/examples/Kaleidoscope/BuildingAJIT/Chapter2/KaleidoscopeJIT.h b/examples/Kaleidoscope/BuildingAJIT/Chapter2/KaleidoscopeJIT.h index 9ea84d1a8581bc744069bae9b0308b562afb676e..7c803b138c0666e9406bccad50f6aeb653c07ef1 100644 --- a/examples/Kaleidoscope/BuildingAJIT/Chapter2/KaleidoscopeJIT.h +++ b/examples/Kaleidoscope/BuildingAJIT/Chapter2/KaleidoscopeJIT.h @@ -47,13 +47,13 @@ private: std::shared_ptr Resolver; std::unique_ptr TM; const DataLayout DL; - RTDyldObjectLinkingLayer ObjectLayer; - IRCompileLayer CompileLayer; + LegacyRTDyldObjectLinkingLayer ObjectLayer; + LegacyIRCompileLayer CompileLayer; using OptimizeFunction = std::function(std::unique_ptr)>; - IRTransformLayer OptimizeLayer; + LegacyIRTransformLayer OptimizeLayer; public: KaleidoscopeJIT() @@ -73,7 +73,7 @@ public: TM(EngineBuilder().selectTarget()), DL(TM->createDataLayout()), ObjectLayer(ES, [this](VModuleKey) { - return RTDyldObjectLinkingLayer::Resources{ + return LegacyRTDyldObjectLinkingLayer::Resources{ std::make_shared(), Resolver}; }), CompileLayer(ObjectLayer, SimpleCompiler(*TM)), diff --git a/examples/Kaleidoscope/BuildingAJIT/Chapter3/KaleidoscopeJIT.h b/examples/Kaleidoscope/BuildingAJIT/Chapter3/KaleidoscopeJIT.h index 80c39bd70f72339df76f60c3a7c4f195ed8bd5d5..ce0111d2f6b845f6a0fb1ef382e9c892aa030df7 100644 --- a/examples/Kaleidoscope/BuildingAJIT/Chapter3/KaleidoscopeJIT.h +++ b/examples/Kaleidoscope/BuildingAJIT/Chapter3/KaleidoscopeJIT.h @@ -51,23 +51,23 @@ private: std::map> Resolvers; std::unique_ptr TM; const DataLayout DL; - RTDyldObjectLinkingLayer ObjectLayer; - IRCompileLayer CompileLayer; + LegacyRTDyldObjectLinkingLayer ObjectLayer; + LegacyIRCompileLayer CompileLayer; using OptimizeFunction = std::function(std::unique_ptr)>; - IRTransformLayer OptimizeLayer; + LegacyIRTransformLayer OptimizeLayer; std::unique_ptr CompileCallbackManager; - CompileOnDemandLayer CODLayer; + LegacyCompileOnDemandLayer CODLayer; public: KaleidoscopeJIT() : TM(EngineBuilder().selectTarget()), DL(TM->createDataLayout()), ObjectLayer(ES, [this](VModuleKey K) { - return RTDyldObjectLinkingLayer::Resources{ + return LegacyRTDyldObjectLinkingLayer::Resources{ std::make_shared(), Resolvers[K]}; }), diff --git a/examples/Kaleidoscope/BuildingAJIT/Chapter4/KaleidoscopeJIT.h b/examples/Kaleidoscope/BuildingAJIT/Chapter4/KaleidoscopeJIT.h index 04ad86e34bfb10ae62469fa3c62e3cead70a26f9..ffca65fbcd4f52c30f3fa40508eeb9773b961ba9 100644 --- a/examples/Kaleidoscope/BuildingAJIT/Chapter4/KaleidoscopeJIT.h +++ b/examples/Kaleidoscope/BuildingAJIT/Chapter4/KaleidoscopeJIT.h @@ -77,13 +77,13 @@ private: std::shared_ptr Resolver; std::unique_ptr TM; const DataLayout DL; - RTDyldObjectLinkingLayer ObjectLayer; - IRCompileLayer CompileLayer; + LegacyRTDyldObjectLinkingLayer ObjectLayer; + LegacyIRCompileLayer CompileLayer; using OptimizeFunction = std::function(std::unique_ptr)>; - IRTransformLayer OptimizeLayer; + LegacyIRTransformLayer OptimizeLayer; std::unique_ptr CompileCallbackMgr; std::unique_ptr IndirectStubsMgr; @@ -108,7 +108,7 @@ public: TM(EngineBuilder().selectTarget()), DL(TM->createDataLayout()), ObjectLayer(ES, [this](VModuleKey K) { - return RTDyldObjectLinkingLayer::Resources{ + return LegacyRTDyldObjectLinkingLayer::Resources{ std::make_shared(), Resolver}; }), CompileLayer(ObjectLayer, SimpleCompiler(*TM)), diff --git a/examples/Kaleidoscope/BuildingAJIT/Chapter5/KaleidoscopeJIT.h b/examples/Kaleidoscope/BuildingAJIT/Chapter5/KaleidoscopeJIT.h index 010f54363778b201d6e56b35732801fa6e29d0fd..f1ae5b022895b8c9b2ddbd404072aa4634d5b09b 100644 --- a/examples/Kaleidoscope/BuildingAJIT/Chapter5/KaleidoscopeJIT.h +++ b/examples/Kaleidoscope/BuildingAJIT/Chapter5/KaleidoscopeJIT.h @@ -82,13 +82,13 @@ private: std::shared_ptr Resolver; std::unique_ptr TM; const DataLayout DL; - RTDyldObjectLinkingLayer ObjectLayer; - IRCompileLayer CompileLayer; + LegacyRTDyldObjectLinkingLayer ObjectLayer; + LegacyIRCompileLayer CompileLayer; using OptimizeFunction = std::function(std::unique_ptr)>; - IRTransformLayer OptimizeLayer; + LegacyIRTransformLayer OptimizeLayer; JITCompileCallbackManager *CompileCallbackMgr; std::unique_ptr IndirectStubsMgr; @@ -116,7 +116,7 @@ public: DL(TM->createDataLayout()), ObjectLayer(ES, [this](VModuleKey K) { - return RTDyldObjectLinkingLayer::Resources{ + return LegacyRTDyldObjectLinkingLayer::Resources{ cantFail(this->Remote.createRemoteMemoryManager()), Resolver}; }), diff --git a/examples/Kaleidoscope/include/KaleidoscopeJIT.h b/examples/Kaleidoscope/include/KaleidoscopeJIT.h index 7239aea7ba1bcf6e2424ba267a65abd85998bc72..972773a64f7e4f2280599960a4d8f5346570dfe9 100644 --- a/examples/Kaleidoscope/include/KaleidoscopeJIT.h +++ b/examples/Kaleidoscope/include/KaleidoscopeJIT.h @@ -40,8 +40,8 @@ namespace orc { class KaleidoscopeJIT { public: - using ObjLayerT = RTDyldObjectLinkingLayer; - using CompileLayerT = IRCompileLayer; + using ObjLayerT = LegacyRTDyldObjectLinkingLayer; + using CompileLayerT = LegacyIRCompileLayer; KaleidoscopeJIT() : Resolver(createLegacyLookupResolver( diff --git a/include/llvm-c/Core.h b/include/llvm-c/Core.h index f7f22387b53e2f5347950fd6a4c4df421c6a9447..c093c0906ce3b32ddf9f19bb7126dcaaba2050ff 100644 --- a/include/llvm-c/Core.h +++ b/include/llvm-c/Core.h @@ -929,6 +929,44 @@ void LLVMGetNamedMetadataOperands(LLVMModuleRef M, const char *Name, void LLVMAddNamedMetadataOperand(LLVMModuleRef M, const char *Name, LLVMValueRef Val); +/** + * Return the directory of the debug location for this value, which must be + * an llvm::Instruction, llvm::GlobalVariable, or llvm::Function. + * + * @see llvm::Instruction::getDebugLoc() + * @see llvm::GlobalVariable::getDebugInfo() + * @see llvm::Function::getSubprogram() + */ +const char *LLVMGetDebugLocDirectory(LLVMValueRef Val, unsigned *Length); + +/** + * Return the filename of the debug location for this value, which must be + * an llvm::Instruction, llvm::GlobalVariable, or llvm::Function. + * + * @see llvm::Instruction::getDebugLoc() + * @see llvm::GlobalVariable::getDebugInfo() + * @see llvm::Function::getSubprogram() + */ +const char *LLVMGetDebugLocFilename(LLVMValueRef Val, unsigned *Length); + +/** + * Return the line number of the debug location for this value, which must be + * an llvm::Instruction, llvm::GlobalVariable, or llvm::Function. + * + * @see llvm::Instruction::getDebugLoc() + * @see llvm::GlobalVariable::getDebugInfo() + * @see llvm::Function::getSubprogram() + */ +unsigned LLVMGetDebugLocLine(LLVMValueRef Val); + +/** + * Return the column number of the debug location for this value, which must be + * an llvm::Instruction. + * + * @see llvm::Instruction::getDebugLoc() + */ +unsigned LLVMGetDebugLocColumn(LLVMValueRef Val); + /** * Add a function to a module under a specified name. * @@ -1501,16 +1539,15 @@ LLVMTypeRef LLVMX86MMXType(void); macro(SelectInst) \ macro(ShuffleVectorInst) \ macro(StoreInst) \ - macro(TerminatorInst) \ - macro(BranchInst) \ - macro(IndirectBrInst) \ - macro(InvokeInst) \ - macro(ReturnInst) \ - macro(SwitchInst) \ - macro(UnreachableInst) \ - macro(ResumeInst) \ - macro(CleanupReturnInst) \ - macro(CatchReturnInst) \ + macro(BranchInst) \ + macro(IndirectBrInst) \ + macro(InvokeInst) \ + macro(ReturnInst) \ + macro(SwitchInst) \ + macro(UnreachableInst) \ + macro(ResumeInst) \ + macro(CleanupReturnInst) \ + macro(CatchReturnInst) \ macro(FuncletPadInst) \ macro(CatchPadInst) \ macro(CleanupPadInst) \ @@ -2344,6 +2381,54 @@ void LLVMSetPersonalityFn(LLVMValueRef Fn, LLVMValueRef PersonalityFn); */ unsigned LLVMGetIntrinsicID(LLVMValueRef Fn); +/** + * Create or insert the declaration of an intrinsic. For overloaded intrinsics, + * parameter types must be provided to uniquely identify an overload. + * + * @see llvm::Intrinsic::getDeclaration() + */ +LLVMValueRef LLVMGetIntrinsicDeclaration(LLVMModuleRef Mod, + unsigned ID, + LLVMTypeRef *ParamTypes, + size_t ParamCount); + +/** + * Retrieves the type of an intrinsic. For overloaded intrinsics, parameter + * types must be provided to uniquely identify an overload. + * + * @see llvm::Intrinsic::getType() + */ +LLVMTypeRef LLVMIntrinsicGetType(LLVMContextRef Ctx, unsigned ID, + LLVMTypeRef *ParamTypes, size_t ParamCount); + +/** + * Retrieves the name of an intrinsic. + * + * @see llvm::Intrinsic::getName() + */ +const char *LLVMIntrinsicGetName(unsigned ID, size_t *NameLength); + +/** + * Copies the name of an overloaded intrinsic identified by a given list of + * parameter types. + * + * Unlike LLVMIntrinsicGetName, the caller is responsible for freeing the + * returned string. + * + * @see llvm::Intrinsic::getName() + */ +const char *LLVMIntrinsicCopyOverloadedName(unsigned ID, + LLVMTypeRef *ParamTypes, + size_t ParamCount, + size_t *NameLength); + +/** + * Obtain if the intrinsic identified by the given ID is overloaded. + * + * @see llvm::Intrinsic::isOverloaded() + */ +LLVMBool LLVMIntrinsicIsOverloaded(unsigned ID); + /** * Obtain the calling function of a function. * @@ -2641,7 +2726,7 @@ LLVMValueRef LLVMGetBasicBlockParent(LLVMBasicBlockRef BB); * If the basic block does not have a terminator (it is not well-formed * if it doesn't), then NULL is returned. * - * The returned LLVMValueRef corresponds to a llvm::TerminatorInst. + * The returned LLVMValueRef corresponds to an llvm::Instruction. * * @see llvm::BasicBlock::getTerminator() */ @@ -2913,6 +2998,15 @@ LLVMRealPredicate LLVMGetFCmpPredicate(LLVMValueRef Inst); */ LLVMValueRef LLVMInstructionClone(LLVMValueRef Inst); +/** + * Determine whether an instruction is a terminator. This routine is named to + * be compatible with historical functions that did this by querying the + * underlying C++ type. + * + * @see llvm::Instruction::isTerminator() + */ +LLVMValueRef LLVMIsATerminatorInst(LLVMValueRef Inst); + /** * @defgroup LLVMCCoreValueInstructionCall Call Sites and Invocations * @@ -3053,8 +3147,8 @@ void LLVMSetUnwindDest(LLVMValueRef InvokeInst, LLVMBasicBlockRef B); /** * @defgroup LLVMCCoreValueInstructionTerminator Terminators * - * Functions in this group only apply to instructions that map to - * llvm::TerminatorInst instances. + * Functions in this group only apply to instructions for which + * LLVMIsATerminatorInst returns true. * * @{ */ @@ -3062,21 +3156,21 @@ void LLVMSetUnwindDest(LLVMValueRef InvokeInst, LLVMBasicBlockRef B); /** * Return the number of successors that this terminator has. * - * @see llvm::TerminatorInst::getNumSuccessors + * @see llvm::Instruction::getNumSuccessors */ unsigned LLVMGetNumSuccessors(LLVMValueRef Term); /** * Return the specified successor. * - * @see llvm::TerminatorInst::getSuccessor + * @see llvm::Instruction::getSuccessor */ LLVMBasicBlockRef LLVMGetSuccessor(LLVMValueRef Term, unsigned i); /** * Update the specified successor to point at the provided block. * - * @see llvm::TerminatorInst::setSuccessor + * @see llvm::Instruction::setSuccessor */ void LLVMSetSuccessor(LLVMValueRef Term, unsigned i, LLVMBasicBlockRef block); @@ -3427,6 +3521,35 @@ LLVMValueRef LLVMBuildNot(LLVMBuilderRef, LLVMValueRef V, const char *Name); LLVMValueRef LLVMBuildMalloc(LLVMBuilderRef, LLVMTypeRef Ty, const char *Name); LLVMValueRef LLVMBuildArrayMalloc(LLVMBuilderRef, LLVMTypeRef Ty, LLVMValueRef Val, const char *Name); + +/** + * Creates and inserts a memset to the specified pointer and the + * specified value. + * + * @see llvm::IRRBuilder::CreateMemSet() + */ +LLVMValueRef LLVMBuildMemSet(LLVMBuilderRef B, LLVMValueRef Ptr, + LLVMValueRef Val, LLVMValueRef Len, + unsigned Align); +/** + * Creates and inserts a memcpy between the specified pointers. + * + * @see llvm::IRRBuilder::CreateMemCpy() + */ +LLVMValueRef LLVMBuildMemCpy(LLVMBuilderRef B, + LLVMValueRef Dst, unsigned DstAlign, + LLVMValueRef Src, unsigned SrcAlign, + LLVMValueRef Size); +/** + * Creates and inserts a memmove between the specified pointers. + * + * @see llvm::IRRBuilder::CreateMemMove() + */ +LLVMValueRef LLVMBuildMemMove(LLVMBuilderRef B, + LLVMValueRef Dst, unsigned DstAlign, + LLVMValueRef Src, unsigned SrcAlign, + LLVMValueRef Size); + LLVMValueRef LLVMBuildAlloca(LLVMBuilderRef, LLVMTypeRef Ty, const char *Name); LLVMValueRef LLVMBuildArrayAlloca(LLVMBuilderRef, LLVMTypeRef Ty, LLVMValueRef Val, const char *Name); diff --git a/include/llvm-c/ExecutionEngine.h b/include/llvm-c/ExecutionEngine.h index 49ae6fee45f0138f75cc984149ff08a156e1f1fa..e8ebef9ab15d8a709046c167a18e80813288ede4 100644 --- a/include/llvm-c/ExecutionEngine.h +++ b/include/llvm-c/ExecutionEngine.h @@ -186,7 +186,7 @@ void LLVMDisposeMCJITMemoryManager(LLVMMCJITMemoryManagerRef MM); LLVMJITEventListenerRef LLVMCreateGDBRegistrationListener(void); LLVMJITEventListenerRef LLVMCreateIntelJITEventListener(void); -LLVMJITEventListenerRef LLVMCreateOprofileJITEventListener(void); +LLVMJITEventListenerRef LLVMCreateOProfileJITEventListener(void); LLVMJITEventListenerRef LLVMCreatePerfJITEventListener(void); /** diff --git a/include/llvm-c/OptRemarks.h b/include/llvm-c/OptRemarks.h new file mode 100644 index 0000000000000000000000000000000000000000..6a90394e711c57962c462f8b17eb85f86146cf1b --- /dev/null +++ b/include/llvm-c/OptRemarks.h @@ -0,0 +1,204 @@ +/*===-- llvm-c/OptRemarks.h - OptRemarks Public C Interface -------*- C -*-===*\ +|* *| +|* The LLVM Compiler Infrastructure *| +|* *| +|* This file is distributed under the University of Illinois Open Source *| +|* License. See LICENSE.TXT for details. *| +|* *| +|*===----------------------------------------------------------------------===*| +|* *| +|* This header provides a public interface to an opt-remark library. *| +|* LLVM provides an implementation of this interface. *| +|* *| +\*===----------------------------------------------------------------------===*/ + +#ifndef LLVM_C_OPT_REMARKS_H +#define LLVM_C_OPT_REMARKS_H + +#include "llvm-c/Core.h" +#include "llvm-c/Types.h" +#ifdef __cplusplus +#include +extern "C" { +#else +#include +#endif /* !defined(__cplusplus) */ + +/** + * @defgroup LLVMCOPTREMARKS OptRemarks + * @ingroup LLVMC + * + * @{ + */ + +#define OPT_REMARKS_API_VERSION 0 + +/** + * String containing a buffer and a length. The buffer is not guaranteed to be + * zero-terminated. + * + * \since OPT_REMARKS_API_VERSION=0 + */ +typedef struct { + const char *Str; + uint32_t Len; +} LLVMOptRemarkStringRef; + +/** + * DebugLoc containing File, Line and Column. + * + * \since OPT_REMARKS_API_VERSION=0 + */ +typedef struct { + // File: + LLVMOptRemarkStringRef SourceFile; + // Line: + uint32_t SourceLineNumber; + // Column: + uint32_t SourceColumnNumber; +} LLVMOptRemarkDebugLoc; + +/** + * Element of the "Args" list. The key might give more information about what + * are the semantics of the value, e.g. "Callee" will tell you that the value + * is a symbol that names a function. + * + * \since OPT_REMARKS_API_VERSION=0 + */ +typedef struct { + // e.g. "Callee" + LLVMOptRemarkStringRef Key; + // e.g. "malloc" + LLVMOptRemarkStringRef Value; + + // "DebugLoc": Optional + LLVMOptRemarkDebugLoc DebugLoc; +} LLVMOptRemarkArg; + +/** + * One remark entry. + * + * \since OPT_REMARKS_API_VERSION=0 + */ +typedef struct { + // e.g. !Missed, !Passed + LLVMOptRemarkStringRef RemarkType; + // "Pass": Required + LLVMOptRemarkStringRef PassName; + // "Name": Required + LLVMOptRemarkStringRef RemarkName; + // "Function": Required + LLVMOptRemarkStringRef FunctionName; + + // "DebugLoc": Optional + LLVMOptRemarkDebugLoc DebugLoc; + // "Hotness": Optional + uint32_t Hotness; + // "Args": Optional. It is an array of `num_args` elements. + uint32_t NumArgs; + LLVMOptRemarkArg *Args; +} LLVMOptRemarkEntry; + +typedef struct LLVMOptRemarkOpaqueParser *LLVMOptRemarkParserRef; + +/** + * Creates a remark parser that can be used to read and parse the buffer located + * in \p Buf of size \p Size. + * + * \p Buf cannot be NULL. + * + * This function should be paired with LLVMOptRemarkParserDispose() to avoid + * leaking resources. + * + * \since OPT_REMARKS_API_VERSION=0 + */ +extern LLVMOptRemarkParserRef LLVMOptRemarkParserCreate(const void *Buf, + uint64_t Size); + +/** + * Returns the next remark in the file. + * + * The value pointed to by the return value is invalidated by the next call to + * LLVMOptRemarkParserGetNext(). + * + * If the parser reaches the end of the buffer, the return value will be NULL. + * + * In the case of an error, the return value will be NULL, and: + * + * 1) LLVMOptRemarkParserHasError() will return `1`. + * + * 2) LLVMOptRemarkParserGetErrorMessage() will return a descriptive error + * message. + * + * An error may occur if: + * + * 1) An argument is invalid. + * + * 2) There is a YAML parsing error. This type of error aborts parsing + * immediately and returns `1`. It can occur on malformed YAML. + * + * 3) Remark parsing error. If this type of error occurs, the parser won't call + * the handler and will continue to the next one. It can occur on malformed + * remarks, like missing or extra fields in the file. + * + * Here is a quick example of the usage: + * + * ``` + * LLVMOptRemarkParserRef Parser = LLVMOptRemarkParserCreate(Buf, Size); + * LLVMOptRemarkEntry *Remark = NULL; + * while ((Remark == LLVMOptRemarkParserGetNext(Parser))) { + * // use Remark + * } + * bool HasError = LLVMOptRemarkParserHasError(Parser); + * LLVMOptRemarkParserDispose(Parser); + * ``` + * + * \since OPT_REMARKS_API_VERSION=0 + */ +extern LLVMOptRemarkEntry * +LLVMOptRemarkParserGetNext(LLVMOptRemarkParserRef Parser); + +/** + * Returns `1` if the parser encountered an error while parsing the buffer. + * + * \since OPT_REMARKS_API_VERSION=0 + */ +extern LLVMBool LLVMOptRemarkParserHasError(LLVMOptRemarkParserRef Parser); + +/** + * Returns a null-terminated string containing an error message. + * + * In case of no error, the result is `NULL`. + * + * The memory of the string is bound to the lifetime of \p Parser. If + * LLVMOptRemarkParserDispose() is called, the memory of the string will be + * released. + * + * \since OPT_REMARKS_API_VERSION=0 + */ +extern const char * +LLVMOptRemarkParserGetErrorMessage(LLVMOptRemarkParserRef Parser); + +/** + * Releases all the resources used by \p Parser. + * + * \since OPT_REMARKS_API_VERSION=0 + */ +extern void LLVMOptRemarkParserDispose(LLVMOptRemarkParserRef Parser); + +/** + * Returns the version of the opt-remarks dylib. + * + * \since OPT_REMARKS_API_VERSION=0 + */ +extern uint32_t LLVMOptRemarkVersion(void); + +/** + * @} // endgoup LLVMCOPTREMARKS + */ + +#ifdef __cplusplus +} +#endif /* !defined(__cplusplus) */ + +#endif /* LLVM_C_OPT_REMARKS_H */ diff --git a/include/llvm/ADT/APFloat.h b/include/llvm/ADT/APFloat.h index 5c59af4c04ba6a22737cc4109a95c9a875d26ab2..52ed183c78aed1fa2f12eaac1b355d25d13b52aa 100644 --- a/include/llvm/ADT/APFloat.h +++ b/include/llvm/ADT/APFloat.h @@ -1243,6 +1243,32 @@ inline APFloat maxnum(const APFloat &A, const APFloat &B) { return (A.compare(B) == APFloat::cmpLessThan) ? B : A; } +/// Implements IEEE 754-2018 minimum semantics. Returns the smaller of 2 +/// arguments, propagating NaNs and treating -0 as less than +0. +LLVM_READONLY +inline APFloat minimum(const APFloat &A, const APFloat &B) { + if (A.isNaN()) + return A; + if (B.isNaN()) + return B; + if (A.isZero() && B.isZero() && (A.isNegative() != B.isNegative())) + return A.isNegative() ? A : B; + return (B.compare(A) == APFloat::cmpLessThan) ? B : A; +} + +/// Implements IEEE 754-2018 maximum semantics. Returns the larger of 2 +/// arguments, propagating NaNs and treating -0 as less than +0. +LLVM_READONLY +inline APFloat maximum(const APFloat &A, const APFloat &B) { + if (A.isNaN()) + return A; + if (B.isNaN()) + return B; + if (A.isZero() && B.isZero() && (A.isNegative() != B.isNegative())) + return A.isNegative() ? B : A; + return (A.compare(B) == APFloat::cmpLessThan) ? B : A; +} + } // namespace llvm #undef APFLOAT_DISPATCH_ON_SEMANTICS diff --git a/include/llvm/ADT/DenseMap.h b/include/llvm/ADT/DenseMap.h index 380f1db0d04adbf8c28424412cb291fa77fbca62..1f50502fff92bad73eec7b41fc3fec39fd501429 100644 --- a/include/llvm/ADT/DenseMap.h +++ b/include/llvm/ADT/DenseMap.h @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -38,6 +39,34 @@ namespace detail { // implementation without requiring two members. template struct DenseMapPair : public std::pair { + + // FIXME: Switch to inheriting constructors when we drop support for older + // clang versions. + // NOTE: This default constructor is declared with '{}' rather than + // '= default' to work around a separate bug in clang-3.8. This can + // also go when we switch to inheriting constructors. + DenseMapPair() {} + + DenseMapPair(const KeyT &Key, const ValueT &Value) + : std::pair(Key, Value) {} + + DenseMapPair(KeyT &&Key, ValueT &&Value) + : std::pair(std::move(Key), std::move(Value)) {} + + template + DenseMapPair(AltKeyT &&AltKey, AltValueT &&AltValue, + typename std::enable_if< + std::is_convertible::value && + std::is_convertible::value>::type * = 0) + : std::pair(std::forward(AltKey), + std::forward(AltValue)) {} + + template + DenseMapPair(AltPairT &&AltPair, + typename std::enable_if>::value>::type * = 0) + : std::pair(std::forward(AltPair)) {} + KeyT &getFirst() { return std::pair::first; } const KeyT &getFirst() const { return std::pair::first; } ValueT &getSecond() { return std::pair::second; } @@ -46,9 +75,10 @@ struct DenseMapPair : public std::pair { } // end namespace detail -template < - typename KeyT, typename ValueT, typename KeyInfoT = DenseMapInfo, - typename Bucket = detail::DenseMapPair, bool IsConst = false> +template , + typename Bucket = llvm::detail::DenseMapPair, + bool IsConst = false> class DenseMapIterator; template +bool operator==( + const DenseMapBase &LHS, + const DenseMapBase &RHS) { + if (LHS.size() != RHS.size()) + return false; + + for (auto &KV : LHS) { + auto I = RHS.find(KV.first); + if (I == RHS.end() || I->second != KV.second) + return false; + } + + return true; +} + +/// Inequality comparison for DenseMap. +/// +/// Equivalent to !(LHS == RHS). See operator== for performance notes. +template +bool operator!=( + const DenseMapBase &LHS, + const DenseMapBase &RHS) { + return !(LHS == RHS); +} + template , - typename BucketT = detail::DenseMapPair> + typename BucketT = llvm::detail::DenseMapPair> class DenseMap : public DenseMapBase, KeyT, ValueT, KeyInfoT, BucketT> { friend class DenseMapBase; @@ -676,6 +740,11 @@ public: this->insert(I, E); } + DenseMap(std::initializer_list Vals) { + init(Vals.size()); + this->insert(Vals.begin(), Vals.end()); + } + ~DenseMap() { this->destroyAll(); operator delete(Buckets); @@ -798,7 +867,7 @@ private: template , - typename BucketT = detail::DenseMapPair> + typename BucketT = llvm::detail::DenseMapPair> class SmallDenseMap : public DenseMapBase< SmallDenseMap, KeyT, diff --git a/include/llvm/ADT/DenseSet.h b/include/llvm/ADT/DenseSet.h index 52fe4adb5bd3f9eaf64591c3b71f49ec937607e3..e85a38587e41dcd97366b64dc0eaeac4a0c97bca 100644 --- a/include/llvm/ADT/DenseSet.h +++ b/include/llvm/ADT/DenseSet.h @@ -16,6 +16,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseMapInfo.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/type_traits.h" #include #include @@ -67,7 +68,7 @@ public: explicit DenseSetImpl(unsigned InitialReserve = 0) : TheMap(InitialReserve) {} DenseSetImpl(std::initializer_list Elems) - : DenseSetImpl(Elems.size()) { + : DenseSetImpl(PowerOf2Ceil(Elems.size())) { insert(Elems.begin(), Elems.end()); } @@ -214,6 +215,34 @@ public: } }; +/// Equality comparison for DenseSet. +/// +/// Iterates over elements of LHS confirming that each element is also a member +/// of RHS, and that RHS contains no additional values. +/// Equivalent to N calls to RHS.count. Amortized complexity is linear, worst +/// case is O(N^2) (if every hash collides). +template +bool operator==(const DenseSetImpl &LHS, + const DenseSetImpl &RHS) { + if (LHS.size() != RHS.size()) + return false; + + for (auto &E : LHS) + if (!RHS.count(E)) + return false; + + return true; +} + +/// Inequality comparison for DenseSet. +/// +/// Equivalent to !(LHS == RHS). See operator== for performance notes. +template +bool operator!=(const DenseSetImpl &LHS, + const DenseSetImpl &RHS) { + return !(LHS == RHS); +} + } // end namespace detail /// Implements a dense probed hash-table based set. diff --git a/include/llvm/ADT/PostOrderIterator.h b/include/llvm/ADT/PostOrderIterator.h index dc8a9b6e78b20961c694d173c93db36fc4198471..d77b12228cb15b219a95416dc342c55b319e26a4 100644 --- a/include/llvm/ADT/PostOrderIterator.h +++ b/include/llvm/ADT/PostOrderIterator.h @@ -296,12 +296,15 @@ class ReversePostOrderTraversal { public: using rpo_iterator = typename std::vector::reverse_iterator; + using const_rpo_iterator = typename std::vector::const_reverse_iterator; ReversePostOrderTraversal(GraphT G) { Initialize(GT::getEntryNode(G)); } // Because we want a reverse post order, use reverse iterators from the vector rpo_iterator begin() { return Blocks.rbegin(); } + const_rpo_iterator begin() const { return Blocks.crbegin(); } rpo_iterator end() { return Blocks.rend(); } + const_rpo_iterator end() const { return Blocks.crend(); } }; } // end namespace llvm diff --git a/include/llvm/ADT/STLExtras.h b/include/llvm/ADT/STLExtras.h index c209c4aede9bb67047d8b12369bdf39ebc48a05b..4a93ee55e76dcae13dd5b1c1aea7f27efc8fd0fe 100644 --- a/include/llvm/ADT/STLExtras.h +++ b/include/llvm/ADT/STLExtras.h @@ -195,6 +195,12 @@ void adl_swap(T &&lhs, T &&rhs) noexcept( adl_detail::adl_swap(std::forward(lhs), std::forward(rhs)); } +/// Test whether \p RangeOrContainer is empty. Similar to C++17 std::empty. +template +constexpr bool empty(const T &RangeOrContainer) { + return adl_begin(RangeOrContainer) == adl_end(RangeOrContainer); +} + // mapped_iterator - This is a simple iterator adapter that causes a function to // be applied whenever operator* is invoked on the iterator. diff --git a/include/llvm/ADT/SparseBitVector.h b/include/llvm/ADT/SparseBitVector.h index 4cbf40c76805eeb8475c98b9e51d3a89a13980e8..84e73bcbace871d48d47c3b3124fe5bc49cecaac 100644 --- a/include/llvm/ADT/SparseBitVector.h +++ b/include/llvm/ADT/SparseBitVector.h @@ -261,21 +261,33 @@ class SparseBitVector { BITWORD_SIZE = SparseBitVectorElement::BITWORD_SIZE }; - // Pointer to our current Element. - ElementListIter CurrElementIter; ElementList Elements; + // Pointer to our current Element. This has no visible effect on the external + // state of a SparseBitVector, it's just used to improve performance in the + // common case of testing/modifying bits with similar indices. + mutable ElementListIter CurrElementIter; // This is like std::lower_bound, except we do linear searching from the // current position. - ElementListIter FindLowerBound(unsigned ElementIndex) { + ElementListIter FindLowerBoundImpl(unsigned ElementIndex) const { + + // We cache a non-const iterator so we're forced to resort to const_cast to + // get the begin/end in the case where 'this' is const. To avoid duplication + // of code with the only difference being whether the const cast is present + // 'this' is always const in this particular function and we sort out the + // difference in FindLowerBound and FindLowerBoundConst. + ElementListIter Begin = + const_cast *>(this)->Elements.begin(); + ElementListIter End = + const_cast *>(this)->Elements.end(); if (Elements.empty()) { - CurrElementIter = Elements.begin(); - return Elements.begin(); + CurrElementIter = Begin; + return CurrElementIter; } // Make sure our current iterator is valid. - if (CurrElementIter == Elements.end()) + if (CurrElementIter == End) --CurrElementIter; // Search from our current iterator, either backwards or forwards, @@ -284,17 +296,23 @@ class SparseBitVector { if (CurrElementIter->index() == ElementIndex) { return ElementIter; } else if (CurrElementIter->index() > ElementIndex) { - while (ElementIter != Elements.begin() + while (ElementIter != Begin && ElementIter->index() > ElementIndex) --ElementIter; } else { - while (ElementIter != Elements.end() && + while (ElementIter != End && ElementIter->index() < ElementIndex) ++ElementIter; } CurrElementIter = ElementIter; return ElementIter; } + ElementListConstIter FindLowerBoundConst(unsigned ElementIndex) const { + return FindLowerBoundImpl(ElementIndex); + } + ElementListIter FindLowerBound(unsigned ElementIndex) { + return FindLowerBoundImpl(ElementIndex); + } // Iterator to walk set bits in the bitmap. This iterator is a lot uglier // than it would be, in order to be efficient. @@ -423,22 +441,12 @@ class SparseBitVector { public: using iterator = SparseBitVectorIterator; - SparseBitVector() { - CurrElementIter = Elements.begin(); - } + SparseBitVector() : Elements(), CurrElementIter(Elements.begin()) {} - // SparseBitVector copy ctor. - SparseBitVector(const SparseBitVector &RHS) { - ElementListConstIter ElementIter = RHS.Elements.begin(); - while (ElementIter != RHS.Elements.end()) { - Elements.push_back(SparseBitVectorElement(*ElementIter)); - ++ElementIter; - } - - CurrElementIter = Elements.begin (); - } - - ~SparseBitVector() = default; + SparseBitVector(const SparseBitVector &RHS) + : Elements(RHS.Elements), CurrElementIter(Elements.begin()) {} + SparseBitVector(SparseBitVector &&RHS) + : Elements(std::move(RHS.Elements)), CurrElementIter(Elements.begin()) {} // Clear. void clear() { @@ -450,26 +458,23 @@ public: if (this == &RHS) return *this; - Elements.clear(); - - ElementListConstIter ElementIter = RHS.Elements.begin(); - while (ElementIter != RHS.Elements.end()) { - Elements.push_back(SparseBitVectorElement(*ElementIter)); - ++ElementIter; - } - - CurrElementIter = Elements.begin (); - + Elements = RHS.Elements; + CurrElementIter = Elements.begin(); + return *this; + } + SparseBitVector &operator=(SparseBitVector &&RHS) { + Elements = std::move(RHS.Elements); + CurrElementIter = Elements.begin(); return *this; } // Test, Reset, and Set a bit in the bitmap. - bool test(unsigned Idx) { + bool test(unsigned Idx) const { if (Elements.empty()) return false; unsigned ElementIndex = Idx / ElementSize; - ElementListIter ElementIter = FindLowerBound(ElementIndex); + ElementListConstIter ElementIter = FindLowerBoundConst(ElementIndex); // If we can't find an element that is supposed to contain this bit, there // is nothing more to do. diff --git a/include/llvm/ADT/iterator.h b/include/llvm/ADT/iterator.h index cb40fc1781dbc053194ff34fbc01a763a2381058..7f7ed69a005447c19177a6a599c5f577743a4f75 100644 --- a/include/llvm/ADT/iterator.h +++ b/include/llvm/ADT/iterator.h @@ -202,9 +202,7 @@ template < typename ReferenceT = typename std::conditional< std::is_same::value_type>::value, - typename std::iterator_traits::reference, T &>::type, - // Don't provide these, they are mostly to act as aliases below. - typename WrappedTraitsT = std::iterator_traits> + typename std::iterator_traits::reference, T &>::type> class iterator_adaptor_base : public iterator_facade_base { diff --git a/include/llvm/Analysis/AliasAnalysis.h b/include/llvm/Analysis/AliasAnalysis.h index be3496bbd95518a7b535031baee0f8e4e7f2516c..2efcd9dafa195ad64ad7fa13bd9ce6527f85b9ed 100644 --- a/include/llvm/Analysis/AliasAnalysis.h +++ b/include/llvm/Analysis/AliasAnalysis.h @@ -335,8 +335,7 @@ public: /// A convenience wrapper around the primary \c alias interface. AliasResult alias(const Value *V1, const Value *V2) { - return alias(V1, MemoryLocation::UnknownSize, V2, - MemoryLocation::UnknownSize); + return alias(V1, LocationSize::unknown(), V2, LocationSize::unknown()); } /// A trivial helper function to check to see if the specified pointers are @@ -1075,6 +1074,29 @@ public: void getAnalysisUsage(AnalysisUsage &AU) const override; }; +/// A wrapper pass for external alias analyses. This just squirrels away the +/// callback used to run any analyses and register their results. +struct ExternalAAWrapperPass : ImmutablePass { + using CallbackT = std::function; + + CallbackT CB; + + static char ID; + + ExternalAAWrapperPass() : ImmutablePass(ID) { + initializeExternalAAWrapperPassPass(*PassRegistry::getPassRegistry()); + } + + explicit ExternalAAWrapperPass(CallbackT CB) + : ImmutablePass(ID), CB(std::move(CB)) { + initializeExternalAAWrapperPassPass(*PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + } +}; + FunctionPass *createAAResultsWrapperPass(); /// A wrapper pass around a callback which can be used to populate the diff --git a/include/llvm/Analysis/AliasSetTracker.h b/include/llvm/Analysis/AliasSetTracker.h index cf4981d1eb279bd0af74a07a5b50a9fa176939e3..7ed5cd5c4734601857b7b5fbc82f4a487ab4fb04 100644 --- a/include/llvm/Analysis/AliasSetTracker.h +++ b/include/llvm/Analysis/AliasSetTracker.h @@ -389,10 +389,6 @@ public: /// set is returned. AliasSet &getAliasSetFor(const MemoryLocation &MemLoc); - /// Return true if the specified instruction "may" (or must) alias one of the - /// members in any of the sets. - bool containsUnknown(const Instruction *I) const; - /// Return the underlying alias analysis object used by this tracker. AliasAnalysis &getAliasAnalysis() const { return AA; } @@ -441,12 +437,7 @@ private: return *Entry; } - AliasSet &addPointer(Value *P, LocationSize Size, const AAMDNodes &AAInfo, - AliasSet::AccessLattice E); - AliasSet &addPointer(MemoryLocation Loc, - AliasSet::AccessLattice E) { - return addPointer(const_cast(Loc.Ptr), Loc.Size, Loc.AATags, E); - } + AliasSet &addPointer(MemoryLocation Loc, AliasSet::AccessLattice E); AliasSet *mergeAliasSetsForPointer(const Value *Ptr, LocationSize Size, const AAMDNodes &AAInfo); diff --git a/include/llvm/Analysis/CFG.h b/include/llvm/Analysis/CFG.h index cccdd1637411852cdd1cc840b80d90cb55538913..caae0b6e2a8ffaba87a4ab28e4fd231230133dc8 100644 --- a/include/llvm/Analysis/CFG.h +++ b/include/llvm/Analysis/CFG.h @@ -25,7 +25,6 @@ class DominatorTree; class Function; class Instruction; class LoopInfo; -class TerminatorInst; /// Analyze the specified function to find all of the loop backedges in the /// function and return them. This is a relatively cheap (compared to @@ -46,7 +45,7 @@ unsigned GetSuccessorNumber(const BasicBlock *BB, const BasicBlock *Succ); /// edges from a block with multiple successors to a block with multiple /// predecessors. /// -bool isCriticalEdge(const TerminatorInst *TI, unsigned SuccNum, +bool isCriticalEdge(const Instruction *TI, unsigned SuccNum, bool AllowIdenticalEdges = false); /// Determine whether instruction 'To' is reachable from 'From', diff --git a/include/llvm/Analysis/CFGPrinter.h b/include/llvm/Analysis/CFGPrinter.h index a4b642b9ea3f51db01d371d1ee6733cf5addca77..5996dd90bcfd5a5c0f6ade0f806263e68c3e6d2d 100644 --- a/include/llvm/Analysis/CFGPrinter.h +++ b/include/llvm/Analysis/CFGPrinter.h @@ -150,7 +150,7 @@ struct DOTGraphTraits : public DefaultDOTGraphTraits { /// Display the raw branch weights from PGO. std::string getEdgeAttributes(const BasicBlock *Node, succ_const_iterator I, const Function *F) { - const TerminatorInst *TI = Node->getTerminator(); + const Instruction *TI = Node->getTerminator(); if (TI->getNumSuccessors() == 1) return ""; diff --git a/include/llvm/Analysis/DivergenceAnalysis.h b/include/llvm/Analysis/DivergenceAnalysis.h new file mode 100644 index 0000000000000000000000000000000000000000..9fadf52288bc87eb38a043a8b71a3abaebd9835d --- /dev/null +++ b/include/llvm/Analysis/DivergenceAnalysis.h @@ -0,0 +1,178 @@ +//===- llvm/Analysis/DivergenceAnalysis.h - Divergence Analysis -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// \file +// The divergence analysis determines which instructions and branches are +// divergent given a set of divergent source instructions. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_DIVERGENCE_ANALYSIS_H +#define LLVM_ANALYSIS_DIVERGENCE_ANALYSIS_H + +#include "llvm/ADT/DenseSet.h" +#include "llvm/Analysis/SyncDependenceAnalysis.h" +#include "llvm/IR/Function.h" +#include "llvm/Pass.h" +#include + +namespace llvm { +class Module; +class Value; +class Instruction; +class Loop; +class raw_ostream; +class TargetTransformInfo; + +/// \brief Generic divergence analysis for reducible CFGs. +/// +/// This analysis propagates divergence in a data-parallel context from sources +/// of divergence to all users. It requires reducible CFGs. All assignments +/// should be in SSA form. +class DivergenceAnalysis { +public: + /// \brief This instance will analyze the whole function \p F or the loop \p + /// RegionLoop. + /// + /// \param RegionLoop if non-null the analysis is restricted to \p RegionLoop. + /// Otherwise the whole function is analyzed. + /// \param IsLCSSAForm whether the analysis may assume that the IR in the + /// region in in LCSSA form. + DivergenceAnalysis(const Function &F, const Loop *RegionLoop, + const DominatorTree &DT, const LoopInfo &LI, + SyncDependenceAnalysis &SDA, bool IsLCSSAForm); + + /// \brief The loop that defines the analyzed region (if any). + const Loop *getRegionLoop() const { return RegionLoop; } + const Function &getFunction() const { return F; } + + /// \brief Whether \p BB is part of the region. + bool inRegion(const BasicBlock &BB) const; + /// \brief Whether \p I is part of the region. + bool inRegion(const Instruction &I) const; + + /// \brief Mark \p UniVal as a value that is always uniform. + void addUniformOverride(const Value &UniVal); + + /// \brief Mark \p DivVal as a value that is always divergent. + void markDivergent(const Value &DivVal); + + /// \brief Propagate divergence to all instructions in the region. + /// Divergence is seeded by calls to \p markDivergent. + void compute(); + + /// \brief Whether any value was marked or analyzed to be divergent. + bool hasDetectedDivergence() const { return !DivergentValues.empty(); } + + /// \brief Whether \p Val will always return a uniform value regardless of its + /// operands + bool isAlwaysUniform(const Value &Val) const; + + /// \brief Whether \p Val is a divergent value + bool isDivergent(const Value &Val) const; + + void print(raw_ostream &OS, const Module *) const; + +private: + bool updateTerminator(const Instruction &Term) const; + bool updatePHINode(const PHINode &Phi) const; + + /// \brief Computes whether \p Inst is divergent based on the + /// divergence of its operands. + /// + /// \returns Whether \p Inst is divergent. + /// + /// This should only be called for non-phi, non-terminator instructions. + bool updateNormalInstruction(const Instruction &Inst) const; + + /// \brief Mark users of live-out users as divergent. + /// + /// \param LoopHeader the header of the divergent loop. + /// + /// Marks all users of live-out values of the loop headed by \p LoopHeader + /// as divergent and puts them on the worklist. + void taintLoopLiveOuts(const BasicBlock &LoopHeader); + + /// \brief Push all users of \p Val (in the region) to the worklist + void pushUsers(const Value &I); + + /// \brief Push all phi nodes in @block to the worklist + void pushPHINodes(const BasicBlock &Block); + + /// \brief Mark \p Block as join divergent + /// + /// A block is join divergent if two threads may reach it from different + /// incoming blocks at the same time. + void markBlockJoinDivergent(const BasicBlock &Block) { + DivergentJoinBlocks.insert(&Block); + } + + /// \brief Whether \p Val is divergent when read in \p ObservingBlock. + bool isTemporalDivergent(const BasicBlock &ObservingBlock, + const Value &Val) const; + + /// \brief Whether \p Block is join divergent + /// + /// (see markBlockJoinDivergent). + bool isJoinDivergent(const BasicBlock &Block) const { + return DivergentJoinBlocks.find(&Block) != DivergentJoinBlocks.end(); + } + + /// \brief Propagate control-induced divergence to users (phi nodes and + /// instructions). + // + // \param JoinBlock is a divergent loop exit or join point of two disjoint + // paths. + // \returns Whether \p JoinBlock is a divergent loop exit of \p TermLoop. + bool propagateJoinDivergence(const BasicBlock &JoinBlock, + const Loop *TermLoop); + + /// \brief Propagate induced value divergence due to control divergence in \p + /// Term. + void propagateBranchDivergence(const Instruction &Term); + + /// \brief Propagate divergent caused by a divergent loop exit. + /// + /// \param ExitingLoop is a divergent loop. + void propagateLoopDivergence(const Loop &ExitingLoop); + +private: + const Function &F; + // If regionLoop != nullptr, analysis is only performed within \p RegionLoop. + // Otw, analyze the whole function + const Loop *RegionLoop; + + const DominatorTree &DT; + const LoopInfo &LI; + + // Recognized divergent loops + DenseSet DivergentLoops; + + // The SDA links divergent branches to divergent control-flow joins. + SyncDependenceAnalysis &SDA; + + // Use simplified code path for LCSSA form. + bool IsLCSSAForm; + + // Set of known-uniform values. + DenseSet UniformOverrides; + + // Blocks with joining divergent control from different predecessors. + DenseSet DivergentJoinBlocks; + + // Detected/marked divergent values. + DenseSet DivergentValues; + + // Internal worklist for divergence propagation. + std::vector Worklist; +}; + +} // namespace llvm + +#endif // LLVM_ANALYSIS_DIVERGENCE_ANALYSIS_H diff --git a/include/llvm/Analysis/InlineCost.h b/include/llvm/Analysis/InlineCost.h index 529fb75bec9bc45d7751af49ff6ac9d2f1fd3586..4c270354b0c4fd72046276efc12271166377f066 100644 --- a/include/llvm/Analysis/InlineCost.h +++ b/include/llvm/Analysis/InlineCost.h @@ -46,7 +46,6 @@ const int IndirectCallThreshold = 100; const int CallPenalty = 25; const int LastCallToStaticBonus = 15000; const int ColdccPenalty = 2000; -const int NoreturnPenalty = 10000; /// Do not inline functions which allocate this many bytes on the stack /// when the caller is recursive. const unsigned TotalAllocaSizeRecursiveCaller = 1024; diff --git a/include/llvm/Analysis/LoopAccessAnalysis.h b/include/llvm/Analysis/LoopAccessAnalysis.h index 86b402b2394f8bdc94fb20ff394808c80111b439..c59c86c499404536c640efa718ac01f21fdb9400 100644 --- a/include/llvm/Analysis/LoopAccessAnalysis.h +++ b/include/llvm/Analysis/LoopAccessAnalysis.h @@ -564,10 +564,10 @@ public: /// Print the information about the memory accesses in the loop. void print(raw_ostream &OS, unsigned Depth = 0) const; - /// If the loop has any store of a variant value to an invariant address, then + /// If the loop has multiple stores to an invariant address, then /// return true, else return false. - bool hasVariantStoreToLoopInvariantAddress() const { - return HasVariantStoreToLoopInvariantAddress; + bool hasMultipleStoresToLoopInvariantAddress() const { + return HasMultipleStoresToLoopInvariantAddress; } /// Used to add runtime SCEV checks. Simplifies SCEV expressions and converts @@ -620,8 +620,8 @@ private: /// Cache the result of analyzeLoop. bool CanVecMem; - /// Indicator that there is a store of a variant value to a uniform address. - bool HasVariantStoreToLoopInvariantAddress; + /// Indicator that there are multiple stores to a uniform address. + bool HasMultipleStoresToLoopInvariantAddress; /// The diagnostics report generated for the analysis. E.g. why we /// couldn't analyze the loop. diff --git a/include/llvm/Analysis/MemoryDependenceAnalysis.h b/include/llvm/Analysis/MemoryDependenceAnalysis.h index 1c40cffc7f673580c791b14ac35c54793b6a1c50..52340b0cb51ca4e70c6bb626f50aee60b6a44730 100644 --- a/include/llvm/Analysis/MemoryDependenceAnalysis.h +++ b/include/llvm/Analysis/MemoryDependenceAnalysis.h @@ -304,7 +304,7 @@ private: /// The maximum size of the dereferences of the pointer. /// /// May be UnknownSize if the sizes are unknown. - LocationSize Size = MemoryLocation::UnknownSize; + LocationSize Size = LocationSize::unknown(); /// The AA tags associated with dereferences of the pointer. /// /// The members may be null if there are no tags or conflicting tags. diff --git a/include/llvm/Analysis/MemoryLocation.h b/include/llvm/Analysis/MemoryLocation.h index 509efa2ca1dae6a313a5a62fc06175bc50a8292f..cf839c5a1eb8764cd1ac8cafcf52d45e62f846e1 100644 --- a/include/llvm/Analysis/MemoryLocation.h +++ b/include/llvm/Analysis/MemoryLocation.h @@ -239,7 +239,7 @@ public: } explicit MemoryLocation(const Value *Ptr = nullptr, - LocationSize Size = UnknownSize, + LocationSize Size = LocationSize::unknown(), const AAMDNodes &AATags = AAMDNodes()) : Ptr(Ptr), Size(Size), AATags(AATags) {} diff --git a/include/llvm/Analysis/MustExecute.h b/include/llvm/Analysis/MustExecute.h index 40a02735d1b7c51350e45738997c591abd572420..05c28d139889af4b3a1e3a88a11c580b55343640 100644 --- a/include/llvm/Analysis/MustExecute.h +++ b/include/llvm/Analysis/MustExecute.h @@ -19,6 +19,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/Analysis/EHPersonalities.h" +#include "llvm/Analysis/InstructionPrecedenceTracking.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Dominators.h" @@ -45,33 +46,30 @@ class Loop; /// loop were made and the info wasn't recomputed properly, the behavior of all /// methods except for computeLoopSafetyInfo is undefined. class LoopSafetyInfo { - bool MayThrow = false; // The current loop contains an instruction which - // may throw. - bool HeaderMayThrow = false; // Same as previous, but specific to loop header + // Used to update funclet bundle operands. + DenseMap BlockColors; - /// Collect all blocks from \p CurLoop which lie on all possible paths from - /// the header of \p CurLoop (inclusive) to BB (exclusive) into the set - /// \p Predecessors. If \p BB is the header, \p Predecessors will be empty. - void collectTransitivePredecessors( - const Loop *CurLoop, const BasicBlock *BB, - SmallPtrSetImpl &Predecessors) const; +protected: + /// Computes block colors. + void computeBlockColors(const Loop *CurLoop); public: - // Used to update funclet bundle operands. - DenseMap BlockColors; + /// Returns block colors map that is used to update funclet operand bundles. + const DenseMap &getBlockColors() const; - /// Returns true iff the header block of the loop for which this info is - /// calculated contains an instruction that may throw or otherwise exit - /// abnormally. - bool headerMayThrow() const; + /// Copy colors of block \p Old into the block \p New. + void copyColors(BasicBlock *New, BasicBlock *Old); + + /// Returns true iff the block \p BB potentially may throw exception. It can + /// be false-positive in cases when we want to avoid complex analysis. + virtual bool blockMayThrow(const BasicBlock *BB) const = 0; /// Returns true iff any block of the loop for which this info is contains an /// instruction that may throw or otherwise exit abnormally. - bool anyBlockMayThrow() const; + virtual bool anyBlockMayThrow() const = 0; /// Return true if we must reach the block \p BB under assumption that the - /// loop \p CurLoop is entered and no instruction throws or otherwise exits - /// abnormally. + /// loop \p CurLoop is entered. bool allLoopPathsLeadToBlock(const Loop *CurLoop, const BasicBlock *BB, const DominatorTree *DT) const; @@ -80,16 +78,80 @@ public: /// as argument. Updates safety information in LoopSafetyInfo argument. /// Note: This is defined to clear and reinitialize an already initialized /// LoopSafetyInfo. Some callers rely on this fact. - void computeLoopSafetyInfo(Loop *); + virtual void computeLoopSafetyInfo(const Loop *CurLoop) = 0; + + /// Returns true if the instruction in a loop is guaranteed to execute at + /// least once (under the assumption that the loop is entered). + virtual bool isGuaranteedToExecute(const Instruction &Inst, + const DominatorTree *DT, + const Loop *CurLoop) const = 0; LoopSafetyInfo() = default; + + virtual ~LoopSafetyInfo() = default; }; -/// Returns true if the instruction in a loop is guaranteed to execute at least -/// once (under the assumption that the loop is entered). -bool isGuaranteedToExecute(const Instruction &Inst, const DominatorTree *DT, - const Loop *CurLoop, - const LoopSafetyInfo *SafetyInfo); + +/// Simple and conservative implementation of LoopSafetyInfo that can give +/// false-positive answers to its queries in order to avoid complicated +/// analysis. +class SimpleLoopSafetyInfo: public LoopSafetyInfo { + bool MayThrow = false; // The current loop contains an instruction which + // may throw. + bool HeaderMayThrow = false; // Same as previous, but specific to loop header + +public: + virtual bool blockMayThrow(const BasicBlock *BB) const; + + virtual bool anyBlockMayThrow() const; + + virtual void computeLoopSafetyInfo(const Loop *CurLoop); + + virtual bool isGuaranteedToExecute(const Instruction &Inst, + const DominatorTree *DT, + const Loop *CurLoop) const; + + SimpleLoopSafetyInfo() : LoopSafetyInfo() {}; + + virtual ~SimpleLoopSafetyInfo() {}; +}; + +/// This implementation of LoopSafetyInfo use ImplicitControlFlowTracking to +/// give precise answers on "may throw" queries. This implementation uses cache +/// that should be invalidated by calling the methods insertInstructionTo and +/// removeInstruction whenever we modify a basic block's contents by adding or +/// removing instructions. +class ICFLoopSafetyInfo: public LoopSafetyInfo { + bool MayThrow = false; // The current loop contains an instruction which + // may throw. + // Contains information about implicit control flow in this loop's blocks. + mutable ImplicitControlFlowTracking ICF; + +public: + virtual bool blockMayThrow(const BasicBlock *BB) const; + + virtual bool anyBlockMayThrow() const; + + virtual void computeLoopSafetyInfo(const Loop *CurLoop); + + virtual bool isGuaranteedToExecute(const Instruction &Inst, + const DominatorTree *DT, + const Loop *CurLoop) const; + + /// Inform the safety info that we are planning to insert a new instruction + /// into the basic block \p BB. It will make all cache updates to keep it + /// correct after this insertion. + void insertInstructionTo(const BasicBlock *BB); + + /// Inform safety info that we are planning to remove the instruction \p Inst + /// from its block. It will make all cache updates to keep it correct after + /// this removal. + void removeInstruction(const Instruction *Inst); + + ICFLoopSafetyInfo(DominatorTree *DT) : LoopSafetyInfo(), ICF(DT) {}; + + virtual ~ICFLoopSafetyInfo() {}; +}; } diff --git a/include/llvm/Analysis/ScalarEvolution.h b/include/llvm/Analysis/ScalarEvolution.h index 89918e3c205bf00a6ebef83a8762ca54d907a7c0..8f4200b07e5c7bd3a09fa7ccfa5c7c62ed5febef 100644 --- a/include/llvm/Analysis/ScalarEvolution.h +++ b/include/llvm/Analysis/ScalarEvolution.h @@ -1833,6 +1833,10 @@ private: const SCEV *getOrCreateMulExpr(SmallVectorImpl &Ops, SCEV::NoWrapFlags Flags); + // Get addrec expr already created or create a new one. + const SCEV *getOrCreateAddRecExpr(SmallVectorImpl &Ops, + const Loop *L, SCEV::NoWrapFlags Flags); + /// Return x if \p Val is f(x) where f is a 1-1 function. const SCEV *stripInjectiveFunctions(const SCEV *Val) const; diff --git a/include/llvm/Analysis/SparsePropagation.h b/include/llvm/Analysis/SparsePropagation.h index 04e94f7cd5279448ffb930bbe491cc2b6216e19e..02a2e64268b7f91d1130e7cb278978414557d46c 100644 --- a/include/llvm/Analysis/SparsePropagation.h +++ b/include/llvm/Analysis/SparsePropagation.h @@ -189,12 +189,12 @@ private: /// getFeasibleSuccessors - Return a vector of booleans to indicate which /// successors are reachable from a given terminator instruction. - void getFeasibleSuccessors(TerminatorInst &TI, SmallVectorImpl &Succs, + void getFeasibleSuccessors(Instruction &TI, SmallVectorImpl &Succs, bool AggressiveUndef); void visitInst(Instruction &I); void visitPHINode(PHINode &I); - void visitTerminatorInst(TerminatorInst &TI); + void visitTerminator(Instruction &TI); }; //===----------------------------------------------------------------------===// @@ -286,7 +286,7 @@ void SparseSolver::markEdgeExecutable( template void SparseSolver::getFeasibleSuccessors( - TerminatorInst &TI, SmallVectorImpl &Succs, bool AggressiveUndef) { + Instruction &TI, SmallVectorImpl &Succs, bool AggressiveUndef) { Succs.resize(TI.getNumSuccessors()); if (TI.getNumSuccessors() == 0) return; @@ -374,7 +374,7 @@ template bool SparseSolver::isEdgeFeasible( BasicBlock *From, BasicBlock *To, bool AggressiveUndef) { SmallVector SuccFeasible; - TerminatorInst *TI = From->getTerminator(); + Instruction *TI = From->getTerminator(); getFeasibleSuccessors(*TI, SuccFeasible, AggressiveUndef); for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) @@ -385,8 +385,8 @@ bool SparseSolver::isEdgeFeasible( } template -void SparseSolver::visitTerminatorInst( - TerminatorInst &TI) { +void SparseSolver::visitTerminator( + Instruction &TI) { SmallVector SuccFeasible; getFeasibleSuccessors(TI, SuccFeasible, true); @@ -465,8 +465,8 @@ void SparseSolver::visitInst(Instruction &I) { if (ChangedValue.second != LatticeFunc->getUntrackedVal()) UpdateState(ChangedValue.first, ChangedValue.second); - if (TerminatorInst *TI = dyn_cast(&I)) - visitTerminatorInst(*TI); + if (I.isTerminator()) + visitTerminator(I); } template diff --git a/include/llvm/Analysis/SyncDependenceAnalysis.h b/include/llvm/Analysis/SyncDependenceAnalysis.h new file mode 100644 index 0000000000000000000000000000000000000000..df693d9d8e8c2b2cb1eb78c50b604819df997a2c --- /dev/null +++ b/include/llvm/Analysis/SyncDependenceAnalysis.h @@ -0,0 +1,86 @@ +//===- SyncDependenceAnalysis.h - Divergent Branch Dependence -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// \file +// This file defines the SyncDependenceAnalysis class, which computes for +// every divergent branch the set of phi nodes that the branch will make +// divergent. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_SYNC_DEPENDENCE_ANALYSIS_H +#define LLVM_ANALYSIS_SYNC_DEPENDENCE_ANALYSIS_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Analysis/LoopInfo.h" +#include + +namespace llvm { + +class BasicBlock; +class DominatorTree; +class Loop; +class PostDominatorTree; + +using ConstBlockSet = SmallPtrSet; + +/// \brief Relates points of divergent control to join points in +/// reducible CFGs. +/// +/// This analysis relates points of divergent control to points of converging +/// divergent control. The analysis requires all loops to be reducible. +class SyncDependenceAnalysis { + void visitSuccessor(const BasicBlock &succBlock, const Loop *termLoop, + const BasicBlock *defBlock); + +public: + bool inRegion(const BasicBlock &BB) const; + + ~SyncDependenceAnalysis(); + SyncDependenceAnalysis(const DominatorTree &DT, const PostDominatorTree &PDT, + const LoopInfo &LI); + + /// \brief Computes divergent join points and loop exits caused by branch + /// divergence in \p Term. + /// + /// The set of blocks which are reachable by disjoint paths from \p Term. + /// The set also contains loop exits if there two disjoint paths: + /// one from \p Term to the loop exit and another from \p Term to the loop + /// header. Those exit blocks are added to the returned set. + /// If L is the parent loop of \p Term and an exit of L is in the returned + /// set then L is a divergent loop. + const ConstBlockSet &join_blocks(const Instruction &Term); + + /// \brief Computes divergent join points and loop exits (in the surrounding + /// loop) caused by the divergent loop exits of\p Loop. + /// + /// The set of blocks which are reachable by disjoint paths from the + /// loop exits of \p Loop. + /// This treats the loop as a single node in \p Loop's parent loop. + /// The returned set has the same properties as for join_blocks(TermInst&). + const ConstBlockSet &join_blocks(const Loop &Loop); + +private: + static ConstBlockSet EmptyBlockSet; + + ReversePostOrderTraversal FuncRPOT; + const DominatorTree &DT; + const PostDominatorTree &PDT; + const LoopInfo &LI; + + std::map> CachedLoopExitJoins; + std::map> + CachedBranchJoins; +}; + +} // namespace llvm + +#endif // LLVM_ANALYSIS_SYNC_DEPENDENCE_ANALYSIS_H diff --git a/include/llvm/Analysis/TargetLibraryInfo.def b/include/llvm/Analysis/TargetLibraryInfo.def index f94debba9c52b8f46fc1c9f94ec98f0450d6acea..518a85ee1a016949d06ed95eea057a14fd5c0f66 100644 --- a/include/llvm/Analysis/TargetLibraryInfo.def +++ b/include/llvm/Analysis/TargetLibraryInfo.def @@ -565,6 +565,30 @@ TLI_DEFINE_STRING_INTERNAL("cosl") /// char *ctermid(char *s); TLI_DEFINE_ENUM_INTERNAL(ctermid) TLI_DEFINE_STRING_INTERNAL("ctermid") +/// int execl(const char *path, const char *arg, ...); +TLI_DEFINE_ENUM_INTERNAL(execl) +TLI_DEFINE_STRING_INTERNAL("execl") +/// int execle(const char *file, const char *arg, ..., char * const envp[]); +TLI_DEFINE_ENUM_INTERNAL(execle) +TLI_DEFINE_STRING_INTERNAL("execle") +/// int execlp(const char *file, const char *arg, ...); +TLI_DEFINE_ENUM_INTERNAL(execlp) +TLI_DEFINE_STRING_INTERNAL("execlp") +/// int execv(const char *path, char *const argv[]); +TLI_DEFINE_ENUM_INTERNAL(execv) +TLI_DEFINE_STRING_INTERNAL("execv") +/// int execvP(const char *file, const char *search_path, char *const argv[]); +TLI_DEFINE_ENUM_INTERNAL(execvP) +TLI_DEFINE_STRING_INTERNAL("execvP") +/// int execve(const char *filename, char *const argv[], char *const envp[]); +TLI_DEFINE_ENUM_INTERNAL(execve) +TLI_DEFINE_STRING_INTERNAL("execve") +/// int execvp(const char *file, char *const argv[]); +TLI_DEFINE_ENUM_INTERNAL(execvp) +TLI_DEFINE_STRING_INTERNAL("execvp") +/// int execvpe(const char *file, char *const argv[], char *const envp[]); +TLI_DEFINE_ENUM_INTERNAL(execvpe) +TLI_DEFINE_STRING_INTERNAL("execvpe") /// double exp(double x); TLI_DEFINE_ENUM_INTERNAL(exp) TLI_DEFINE_STRING_INTERNAL("exp") @@ -709,6 +733,9 @@ TLI_DEFINE_STRING_INTERNAL("fopen") /// FILE *fopen64(const char *filename, const char *opentype) TLI_DEFINE_ENUM_INTERNAL(fopen64) TLI_DEFINE_STRING_INTERNAL("fopen64") +/// int fork(); +TLI_DEFINE_ENUM_INTERNAL(fork) +TLI_DEFINE_STRING_INTERNAL("fork") /// int fprintf(FILE *stream, const char *format, ...); TLI_DEFINE_ENUM_INTERNAL(fprintf) TLI_DEFINE_STRING_INTERNAL("fprintf") diff --git a/include/llvm/Analysis/TargetTransformInfo.h b/include/llvm/Analysis/TargetTransformInfo.h index 18b5a5cf0e5c385400d40c52f0221f90f708b99a..eb0e0270157fd05f3eaf4b36f3aeb6e86b3fcd9d 100644 --- a/include/llvm/Analysis/TargetTransformInfo.h +++ b/include/llvm/Analysis/TargetTransformInfo.h @@ -587,6 +587,11 @@ public: /// Enable matching of interleaved access groups. bool enableInterleavedAccessVectorization() const; + /// Enable matching of interleaved access groups that contain predicated + /// accesses or gaps and therefore vectorized using masked + /// vector loads/stores. + bool enableMaskedInterleavedAccessVectorization() const; + /// Indicate that it is potentially unsafe to automatically vectorize /// floating-point operations because the semantics of vector and scalar /// floating-point semantics may differ. For example, ARM NEON v7 SIMD math @@ -766,7 +771,9 @@ public: /// \return The cost of a shuffle instruction of kind Kind and of type Tp. /// The index and subtype parameters are used by the subvector insertion and - /// extraction shuffle kinds. + /// extraction shuffle kinds to show the insert/extract point and the type of + /// the subvector being inserted/extracted. + /// NOTE: For subvector extractions Tp represents the source type. int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index = 0, Type *SubTp = nullptr) const; @@ -821,9 +828,13 @@ public: /// load allows gaps) /// \p Alignment is the alignment of the memory operation /// \p AddressSpace is address space of the pointer. + /// \p UseMaskForCond indicates if the memory access is predicated. + /// \p UseMaskForGaps indicates if gaps should be masked. int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef Indices, unsigned Alignment, - unsigned AddressSpace) const; + unsigned AddressSpace, + bool UseMaskForCond = false, + bool UseMaskForGaps = false) const; /// Calculate the cost of performing a vector reduction. /// @@ -1072,6 +1083,7 @@ public: virtual const MemCmpExpansionOptions *enableMemCmpExpansion( bool IsZeroCmp) const = 0; virtual bool enableInterleavedAccessVectorization() = 0; + virtual bool enableMaskedInterleavedAccessVectorization() = 0; virtual bool isFPVectorizationPotentiallyUnsafe() = 0; virtual bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, @@ -1132,7 +1144,9 @@ public: unsigned Factor, ArrayRef Indices, unsigned Alignment, - unsigned AddressSpace) = 0; + unsigned AddressSpace, + bool UseMaskForCond = false, + bool UseMaskForGaps = false) = 0; virtual int getArithmeticReductionCost(unsigned Opcode, Type *Ty, bool IsPairwiseForm) = 0; virtual int getMinMaxReductionCost(Type *Ty, Type *CondTy, @@ -1346,6 +1360,9 @@ public: bool enableInterleavedAccessVectorization() override { return Impl.enableInterleavedAccessVectorization(); } + bool enableMaskedInterleavedAccessVectorization() override { + return Impl.enableMaskedInterleavedAccessVectorization(); + } bool isFPVectorizationPotentiallyUnsafe() override { return Impl.isFPVectorizationPotentiallyUnsafe(); } @@ -1471,9 +1488,11 @@ public: } int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef Indices, unsigned Alignment, - unsigned AddressSpace) override { + unsigned AddressSpace, bool UseMaskForCond, + bool UseMaskForGaps) override { return Impl.getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, - Alignment, AddressSpace); + Alignment, AddressSpace, + UseMaskForCond, UseMaskForGaps); } int getArithmeticReductionCost(unsigned Opcode, Type *Ty, bool IsPairwiseForm) override { diff --git a/include/llvm/Analysis/TargetTransformInfoImpl.h b/include/llvm/Analysis/TargetTransformInfoImpl.h index e39fe66c0a479536a514cdbddc07fa24aebcd3e2..5e79c5cdfe0353ad4e6c89f5e32db0fcb8a8b433 100644 --- a/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -160,6 +160,7 @@ public: case Intrinsic::invariant_end: case Intrinsic::launder_invariant_group: case Intrinsic::strip_invariant_group: + case Intrinsic::is_constant: case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: case Intrinsic::objectsize: @@ -313,6 +314,8 @@ public: bool enableInterleavedAccessVectorization() { return false; } + bool enableMaskedInterleavedAccessVectorization() { return false; } + bool isFPVectorizationPotentiallyUnsafe() { return false; } bool allowsMisalignedMemoryAccesses(LLVMContext &Context, @@ -450,8 +453,9 @@ public: unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef Indices, - unsigned Alignment, - unsigned AddressSpace) { + unsigned Alignment, unsigned AddressSpace, + bool UseMaskForCond = false, + bool UseMaskForGaps = false) { return 1; } diff --git a/include/llvm/Analysis/VectorUtils.h b/include/llvm/Analysis/VectorUtils.h index 622d932f74fdb94533ad686353ea89fe1e08ee90..797260f439a04078787fad427f874067d3e96980 100644 --- a/include/llvm/Analysis/VectorUtils.h +++ b/include/llvm/Analysis/VectorUtils.h @@ -24,6 +24,7 @@ namespace llvm { template class ArrayRef; class DemandedBits; class GetElementPtrInst; +class InterleaveGroup; class Loop; class ScalarEvolution; class TargetTransformInfo; @@ -125,6 +126,35 @@ computeMinimumValueSizes(ArrayRef Blocks, /// This function always sets a (possibly null) value for each K in Kinds. Instruction *propagateMetadata(Instruction *I, ArrayRef VL); +/// Create a mask that filters the members of an interleave group where there +/// are gaps. +/// +/// For example, the mask for \p Group with interleave-factor 3 +/// and \p VF 4, that has only its first member present is: +/// +/// <1,0,0,1,0,0,1,0,0,1,0,0> +/// +/// Note: The result is a mask of 0's and 1's, as opposed to the other +/// create[*]Mask() utilities which create a shuffle mask (mask that +/// consists of indices). +Constant *createBitMaskForGaps(IRBuilder<> &Builder, unsigned VF, + const InterleaveGroup &Group); + +/// Create a mask with replicated elements. +/// +/// This function creates a shuffle mask for replicating each of the \p VF +/// elements in a vector \p ReplicationFactor times. It can be used to +/// transform a mask of \p VF elements into a mask of +/// \p VF * \p ReplicationFactor elements used by a predicated +/// interleaved-group of loads/stores whose Interleaved-factor == +/// \p ReplicationFactor. +/// +/// For example, the mask for \p ReplicationFactor=3 and \p VF=4 is: +/// +/// <0,0,0,1,1,1,2,2,2,3,3,3> +Constant *createReplicatedMask(IRBuilder<> &Builder, unsigned ReplicationFactor, + unsigned VF); + /// Create an interleave shuffle mask. /// /// This function creates a shuffle mask for interleaving \p NumVecs vectors of @@ -293,6 +323,23 @@ public: propagateMetadata(NewInst, VL); } + /// Returns true if this Group requires a scalar iteration to handle gaps. + bool requiresScalarEpilogue() const { + // If the last member of the Group exists, then a scalar epilog is not + // needed for this group. + if (getMember(getFactor() - 1)) + return false; + + // We have a group with gaps. It therefore cannot be a group of stores, + // and it can't be a reversed access, because such groups get invalidated. + assert(!getMember(0)->mayWriteToMemory() && + "Group should have been invalidated"); + assert(!isReverse() && "Group should have been invalidated"); + + // This is a group of loads, with gaps, and without a last-member + return true; + } + private: unsigned Factor; // Interleave Factor. bool Reverse; @@ -328,20 +375,31 @@ public: InterleavedAccessInfo(PredicatedScalarEvolution &PSE, Loop *L, DominatorTree *DT, LoopInfo *LI, const LoopAccessInfo *LAI) - : PSE(PSE), TheLoop(L), DT(DT), LI(LI), LAI(LAI) {} + : PSE(PSE), TheLoop(L), DT(DT), LI(LI), LAI(LAI) {} + + ~InterleavedAccessInfo() { reset(); } - ~InterleavedAccessInfo() { + /// Analyze the interleaved accesses and collect them in interleave + /// groups. Substitute symbolic strides using \p Strides. + /// Consider also predicated loads/stores in the analysis if + /// \p EnableMaskedInterleavedGroup is true. + void analyzeInterleaving(bool EnableMaskedInterleavedGroup); + + /// Invalidate groups, e.g., in case all blocks in loop will be predicated + /// contrary to original assumption. Although we currently prevent group + /// formation for predicated accesses, we may be able to relax this limitation + /// in the future once we handle more complicated blocks. + void reset() { SmallPtrSet DelSet; // Avoid releasing a pointer twice. for (auto &I : InterleaveGroupMap) DelSet.insert(I.second); for (auto *Ptr : DelSet) delete Ptr; + InterleaveGroupMap.clear(); + RequiresScalarEpilogue = false; } - /// Analyze the interleaved accesses and collect them in interleave - /// groups. Substitute symbolic strides using \p Strides. - void analyzeInterleaving(); /// Check if \p Instr belongs to any interleave group. bool isInterleaved(Instruction *Instr) const { @@ -362,6 +420,11 @@ public: /// out-of-bounds requires a scalar epilogue iteration for correctness. bool requiresScalarEpilogue() const { return RequiresScalarEpilogue; } + /// Invalidate groups that require a scalar epilogue (due to gaps). This can + /// happen when optimizing for size forbids a scalar epilogue, and the gap + /// cannot be filtered by masking the load/store. + void invalidateGroupsRequiringScalarEpilogue(); + private: /// A wrapper around ScalarEvolution, used to add runtime SCEV checks. /// Simplifies SCEV expressions in the context of existing SCEV assumptions. diff --git a/include/llvm/BinaryFormat/Dwarf.def b/include/llvm/BinaryFormat/Dwarf.def index 6b7a7412f4d92dfa43b54dfe59473556bccb945a..512cc64926db5a6e0692895a6af1660c0e9d6a8f 100644 --- a/include/llvm/BinaryFormat/Dwarf.def +++ b/include/llvm/BinaryFormat/Dwarf.def @@ -873,6 +873,7 @@ HANDLE_DWARF_SECTION(DebugTypes, ".debug_types", "debug-types") HANDLE_DWARF_SECTION(DebugLine, ".debug_line", "debug-line") HANDLE_DWARF_SECTION(DebugLineStr, ".debug_line_str", "debug-line-str") HANDLE_DWARF_SECTION(DebugLoc, ".debug_loc", "debug-loc") +HANDLE_DWARF_SECTION(DebugLoclists, ".debug_loclists", "debug-loclists") HANDLE_DWARF_SECTION(DebugFrame, ".debug_frame", "debug-frame") HANDLE_DWARF_SECTION(DebugMacro, ".debug_macro", "debug-macro") HANDLE_DWARF_SECTION(DebugNames, ".debug_names", "debug-names") diff --git a/include/llvm/BinaryFormat/ELF.h b/include/llvm/BinaryFormat/ELF.h index 2e778779117b110919e03c8df6793047758d54f5..ebbf830a60e9db9318dcc2d8a580da094250e74e 100644 --- a/include/llvm/BinaryFormat/ELF.h +++ b/include/llvm/BinaryFormat/ELF.h @@ -701,6 +701,7 @@ enum : unsigned { EF_AMDGPU_MACH_AMDGCN_GFX902 = 0x02d, EF_AMDGPU_MACH_AMDGCN_GFX904 = 0x02e, EF_AMDGPU_MACH_AMDGCN_GFX906 = 0x02f, + EF_AMDGPU_MACH_AMDGCN_GFX909 = 0x031, // Reserved for AMDGCN-based processors. EF_AMDGPU_MACH_AMDGCN_RESERVED0 = 0x027, @@ -708,11 +709,14 @@ enum : unsigned { // First/last AMDGCN-based processors. EF_AMDGPU_MACH_AMDGCN_FIRST = EF_AMDGPU_MACH_AMDGCN_GFX600, - EF_AMDGPU_MACH_AMDGCN_LAST = EF_AMDGPU_MACH_AMDGCN_GFX906, + EF_AMDGPU_MACH_AMDGCN_LAST = EF_AMDGPU_MACH_AMDGCN_GFX909, - // Indicates if the xnack target feature is enabled for all code contained in - // the object. + // Indicates if the "xnack" target feature is enabled for all code contained + // in the object. EF_AMDGPU_XNACK = 0x100, + // Indicates if the "sram-ecc" target feature is enabled for all code + // contained in the object. + EF_AMDGPU_SRAM_ECC = 0x200, }; // ELF Relocation types for AMDGPU @@ -725,6 +729,38 @@ enum { #include "ELFRelocs/BPF.def" }; +// MSP430 specific e_flags +enum : unsigned { + EF_MSP430_MACH_MSP430x11 = 11, + EF_MSP430_MACH_MSP430x11x1 = 110, + EF_MSP430_MACH_MSP430x12 = 12, + EF_MSP430_MACH_MSP430x13 = 13, + EF_MSP430_MACH_MSP430x14 = 14, + EF_MSP430_MACH_MSP430x15 = 15, + EF_MSP430_MACH_MSP430x16 = 16, + EF_MSP430_MACH_MSP430x20 = 20, + EF_MSP430_MACH_MSP430x22 = 22, + EF_MSP430_MACH_MSP430x23 = 23, + EF_MSP430_MACH_MSP430x24 = 24, + EF_MSP430_MACH_MSP430x26 = 26, + EF_MSP430_MACH_MSP430x31 = 31, + EF_MSP430_MACH_MSP430x32 = 32, + EF_MSP430_MACH_MSP430x33 = 33, + EF_MSP430_MACH_MSP430x41 = 41, + EF_MSP430_MACH_MSP430x42 = 42, + EF_MSP430_MACH_MSP430x43 = 43, + EF_MSP430_MACH_MSP430x44 = 44, + EF_MSP430_MACH_MSP430X = 45, + EF_MSP430_MACH_MSP430x46 = 46, + EF_MSP430_MACH_MSP430x47 = 47, + EF_MSP430_MACH_MSP430x54 = 54, +}; + +// ELF Relocation types for MSP430 +enum { +#include "ELFRelocs/MSP430.def" +}; + #undef ELF_RELOC // Section header. @@ -829,6 +865,8 @@ enum : unsigned { SHT_MIPS_DWARF = 0x7000001e, // DWARF debugging section. SHT_MIPS_ABIFLAGS = 0x7000002a, // ABI information. + SHT_MSP430_ATTRIBUTES = 0x70000003U, + SHT_HIPROC = 0x7fffffff, // Highest processor arch-specific type. SHT_LOUSER = 0x80000000, // Lowest type reserved for applications. SHT_HIUSER = 0xffffffff // Highest type reserved for applications. diff --git a/include/llvm/BinaryFormat/ELFRelocs/MSP430.def b/include/llvm/BinaryFormat/ELFRelocs/MSP430.def new file mode 100644 index 0000000000000000000000000000000000000000..96990abf2db4825c13271108ddb8e014703640c5 --- /dev/null +++ b/include/llvm/BinaryFormat/ELFRelocs/MSP430.def @@ -0,0 +1,16 @@ + +#ifndef ELF_RELOC +#error "ELF_RELOC must be defined" +#endif + +ELF_RELOC(R_MSP430_NONE, 0) +ELF_RELOC(R_MSP430_32, 1) +ELF_RELOC(R_MSP430_10_PCREL, 2) +ELF_RELOC(R_MSP430_16, 3) +ELF_RELOC(R_MSP430_16_PCREL, 4) +ELF_RELOC(R_MSP430_16_BYTE, 5) +ELF_RELOC(R_MSP430_16_PCREL_BYTE, 6) +ELF_RELOC(R_MSP430_2X_PCREL, 7) +ELF_RELOC(R_MSP430_RL_PCREL, 8) +ELF_RELOC(R_MSP430_8, 9) +ELF_RELOC(R_MSP430_SYM_DIFF, 10) diff --git a/include/llvm/BinaryFormat/Wasm.h b/include/llvm/BinaryFormat/Wasm.h index 44dd92ea90103422bdd812e2eae28bd804844046..3d25c9d15e4e7ff2fe73c716fd0425efa443bc06 100644 --- a/include/llvm/BinaryFormat/Wasm.h +++ b/include/llvm/BinaryFormat/Wasm.h @@ -214,6 +214,7 @@ enum : unsigned { enum : unsigned { WASM_LIMITS_FLAG_HAS_MAX = 0x1, + WASM_LIMITS_FLAG_IS_SHARED = 0x2, }; // Kind codes used in the custom "name" section diff --git a/include/llvm/CodeGen/BasicTTIImpl.h b/include/llvm/CodeGen/BasicTTIImpl.h index b460cdc0ba1e8c209f110cb5e280e470d35ddd09..224a41bc2b7aba965904741c4567ffa942f01ea6 100644 --- a/include/llvm/CodeGen/BasicTTIImpl.h +++ b/include/llvm/CodeGen/BasicTTIImpl.h @@ -80,6 +80,23 @@ private: using BaseT = TargetTransformInfoImplCRTPBase; using TTI = TargetTransformInfo; + /// Estimate a cost of Broadcast as an extract and sequence of insert + /// operations. + unsigned getBroadcastShuffleOverhead(Type *Ty) { + assert(Ty->isVectorTy() && "Can only shuffle vectors"); + unsigned Cost = 0; + // Broadcast cost is equal to the cost of extracting the zero'th element + // plus the cost of inserting it into every element of the result vector. + Cost += static_cast(this)->getVectorInstrCost( + Instruction::ExtractElement, Ty, 0); + + for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) { + Cost += static_cast(this)->getVectorInstrCost( + Instruction::InsertElement, Ty, i); + } + return Cost; + } + /// Estimate a cost of shuffle as a sequence of extract and insert /// operations. unsigned getPermuteShuffleOverhead(Type *Ty) { @@ -554,7 +571,10 @@ public: unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp) { switch (Kind) { + case TTI::SK_Broadcast: + return getBroadcastShuffleOverhead(Tp); case TTI::SK_Select: + case TTI::SK_Reverse: case TTI::SK_Transpose: case TTI::SK_PermuteSingleSrc: case TTI::SK_PermuteTwoSrc: @@ -783,8 +803,9 @@ public: unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef Indices, - unsigned Alignment, - unsigned AddressSpace) { + unsigned Alignment, unsigned AddressSpace, + bool UseMaskForCond = false, + bool UseMaskForGaps = false) { VectorType *VT = dyn_cast(VecTy); assert(VT && "Expect a vector type for interleaved memory op"); @@ -795,8 +816,13 @@ public: VectorType *SubVT = VectorType::get(VT->getElementType(), NumSubElts); // Firstly, the cost of load/store operation. - unsigned Cost = static_cast(this)->getMemoryOpCost( - Opcode, VecTy, Alignment, AddressSpace); + unsigned Cost; + if (UseMaskForCond || UseMaskForGaps) + Cost = static_cast(this)->getMaskedMemoryOpCost( + Opcode, VecTy, Alignment, AddressSpace); + else + Cost = static_cast(this)->getMemoryOpCost(Opcode, VecTy, Alignment, + AddressSpace); // Legalize the vector type, and get the legalized and unlegalized type // sizes. @@ -892,6 +918,40 @@ public: ->getVectorInstrCost(Instruction::InsertElement, VT, i); } + if (!UseMaskForCond) + return Cost; + + Type *I8Type = Type::getInt8Ty(VT->getContext()); + VectorType *MaskVT = VectorType::get(I8Type, NumElts); + SubVT = VectorType::get(I8Type, NumSubElts); + + // The Mask shuffling cost is extract all the elements of the Mask + // and insert each of them Factor times into the wide vector: + // + // E.g. an interleaved group with factor 3: + // %mask = icmp ult <8 x i32> %vec1, %vec2 + // %interleaved.mask = shufflevector <8 x i1> %mask, <8 x i1> undef, + // <24 x i32> <0,0,0,1,1,1,2,2,2,3,3,3,4,4,4,5,5,5,6,6,6,7,7,7> + // The cost is estimated as extract all mask elements from the <8xi1> mask + // vector and insert them factor times into the <24xi1> shuffled mask + // vector. + for (unsigned i = 0; i < NumSubElts; i++) + Cost += static_cast(this)->getVectorInstrCost( + Instruction::ExtractElement, SubVT, i); + + for (unsigned i = 0; i < NumElts; i++) + Cost += static_cast(this)->getVectorInstrCost( + Instruction::InsertElement, MaskVT, i); + + // The Gaps mask is invariant and created outside the loop, therefore the + // cost of creating it is not accounted for here. However if we have both + // a MaskForGaps and some other mask that guards the execution of the + // memory access, we need to account for the cost of And-ing the two masks + // inside the loop. + if (UseMaskForGaps) + Cost += static_cast(this)->getArithmeticInstrCost( + BinaryOperator::And, MaskVT); + return Cost; } @@ -1042,12 +1102,12 @@ public: case Intrinsic::minnum: ISDs.push_back(ISD::FMINNUM); if (FMF.noNaNs()) - ISDs.push_back(ISD::FMINNAN); + ISDs.push_back(ISD::FMINIMUM); break; case Intrinsic::maxnum: ISDs.push_back(ISD::FMAXNUM); if (FMF.noNaNs()) - ISDs.push_back(ISD::FMAXNAN); + ISDs.push_back(ISD::FMAXIMUM); break; case Intrinsic::copysign: ISDs.push_back(ISD::FCOPYSIGN); @@ -1284,12 +1344,13 @@ public: LT.second.isVector() ? LT.second.getVectorNumElements() : 1; while (NumVecElts > MVTLen) { NumVecElts /= 2; + Type *SubTy = VectorType::get(ScalarTy, NumVecElts); // Assume the pairwise shuffles add a cost. ShuffleCost += (IsPairwise + 1) * ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty, - NumVecElts, Ty); + NumVecElts, SubTy); ArithCost += ConcreteTTI->getArithmeticInstrCost(Opcode, Ty); - Ty = VectorType::get(ScalarTy, NumVecElts); + Ty = SubTy; ++LongVectorCount; } // The minimal length of the vector is limited by the real length of vector @@ -1297,8 +1358,8 @@ public: // reduction operations are performed on the vectors with the same // architecture-dependent length. ShuffleCost += (NumReduxLevels - LongVectorCount) * (IsPairwise + 1) * - ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty, - NumVecElts, Ty); + ConcreteTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty, + 0, Ty); ArithCost += (NumReduxLevels - LongVectorCount) * ConcreteTTI->getArithmeticInstrCost(Opcode, Ty); return ShuffleCost + ArithCost + getScalarizationOverhead(Ty, false, true); @@ -1331,15 +1392,16 @@ public: LT.second.isVector() ? LT.second.getVectorNumElements() : 1; while (NumVecElts > MVTLen) { NumVecElts /= 2; + Type *SubTy = VectorType::get(ScalarTy, NumVecElts); // Assume the pairwise shuffles add a cost. ShuffleCost += (IsPairwise + 1) * ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty, - NumVecElts, Ty); + NumVecElts, SubTy); MinMaxCost += ConcreteTTI->getCmpSelInstrCost(CmpOpcode, Ty, CondTy, nullptr) + ConcreteTTI->getCmpSelInstrCost(Instruction::Select, Ty, CondTy, nullptr); - Ty = VectorType::get(ScalarTy, NumVecElts); + Ty = SubTy; CondTy = VectorType::get(ScalarCondTy, NumVecElts); ++LongVectorCount; } @@ -1348,8 +1410,8 @@ public: // reduction opertions are perfomed on the vectors with the same // architecture-dependent length. ShuffleCost += (NumReduxLevels - LongVectorCount) * (IsPairwise + 1) * - ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty, - NumVecElts, Ty); + ConcreteTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty, + 0, Ty); MinMaxCost += (NumReduxLevels - LongVectorCount) * (ConcreteTTI->getCmpSelInstrCost(CmpOpcode, Ty, CondTy, nullptr) + diff --git a/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h b/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h index 873587651efda04580dc633c446a6fec78edb7a9..e1132ac59c829af869ec17919fcdc8940838e7ce 100644 --- a/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h +++ b/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h @@ -14,12 +14,14 @@ #include "llvm/CodeGen/GlobalISel/Legalizer.h" #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" +#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Support/Debug.h" #define DEBUG_TYPE "legalizer" +using namespace llvm::MIPatternMatch; namespace llvm { class LegalizationArtifactCombiner { @@ -36,15 +38,17 @@ public: SmallVectorImpl &DeadInsts) { if (MI.getOpcode() != TargetOpcode::G_ANYEXT) return false; - if (MachineInstr *DefMI = getOpcodeDef(TargetOpcode::G_TRUNC, - MI.getOperand(1).getReg(), MRI)) { + + Builder.setInstr(MI); + unsigned DstReg = MI.getOperand(0).getReg(); + unsigned SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg()); + + // aext(trunc x) - > aext/copy/trunc x + unsigned TruncSrc; + if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc)))) { LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI;); - unsigned DstReg = MI.getOperand(0).getReg(); - unsigned SrcReg = DefMI->getOperand(1).getReg(); - Builder.setInstr(MI); - // We get a copy/trunc/extend depending on the sizes - Builder.buildAnyExtOrTrunc(DstReg, SrcReg); - markInstAndDefDead(MI, *DefMI, DeadInsts); + Builder.buildAnyExtOrTrunc(DstReg, TruncSrc); + markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts); return true; } return tryFoldImplicitDef(MI, DeadInsts); @@ -55,24 +59,25 @@ public: if (MI.getOpcode() != TargetOpcode::G_ZEXT) return false; - if (MachineInstr *DefMI = getOpcodeDef(TargetOpcode::G_TRUNC, - MI.getOperand(1).getReg(), MRI)) { - unsigned DstReg = MI.getOperand(0).getReg(); + + Builder.setInstr(MI); + unsigned DstReg = MI.getOperand(0).getReg(); + unsigned SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg()); + + // zext(trunc x) - > and (aext/copy/trunc x), mask + unsigned TruncSrc; + if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc)))) { LLT DstTy = MRI.getType(DstReg); if (isInstUnsupported({TargetOpcode::G_AND, {DstTy}}) || isInstUnsupported({TargetOpcode::G_CONSTANT, {DstTy}})) return false; LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI;); - Builder.setInstr(MI); - unsigned ZExtSrc = MI.getOperand(1).getReg(); - LLT ZExtSrcTy = MRI.getType(ZExtSrc); - APInt Mask = APInt::getAllOnesValue(ZExtSrcTy.getSizeInBits()); - auto MaskCstMIB = Builder.buildConstant(DstTy, Mask.getZExtValue()); - unsigned TruncSrc = DefMI->getOperand(1).getReg(); - // We get a copy/trunc/extend depending on the sizes - auto SrcCopyOrTrunc = Builder.buildAnyExtOrTrunc(DstTy, TruncSrc); - Builder.buildAnd(DstReg, SrcCopyOrTrunc, MaskCstMIB); - markInstAndDefDead(MI, *DefMI, DeadInsts); + LLT SrcTy = MRI.getType(SrcReg); + APInt Mask = APInt::getAllOnesValue(SrcTy.getSizeInBits()); + auto MIBMask = Builder.buildConstant(DstTy, Mask.getZExtValue()); + Builder.buildAnd(DstReg, Builder.buildAnyExtOrTrunc(DstTy, TruncSrc), + MIBMask); + markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts); return true; } return tryFoldImplicitDef(MI, DeadInsts); @@ -83,33 +88,34 @@ public: if (MI.getOpcode() != TargetOpcode::G_SEXT) return false; - if (MachineInstr *DefMI = getOpcodeDef(TargetOpcode::G_TRUNC, - MI.getOperand(1).getReg(), MRI)) { - unsigned DstReg = MI.getOperand(0).getReg(); + + Builder.setInstr(MI); + unsigned DstReg = MI.getOperand(0).getReg(); + unsigned SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg()); + + // sext(trunc x) - > ashr (shl (aext/copy/trunc x), c), c + unsigned TruncSrc; + if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc)))) { LLT DstTy = MRI.getType(DstReg); if (isInstUnsupported({TargetOpcode::G_SHL, {DstTy}}) || isInstUnsupported({TargetOpcode::G_ASHR, {DstTy}}) || isInstUnsupported({TargetOpcode::G_CONSTANT, {DstTy}})) return false; LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI;); - Builder.setInstr(MI); - unsigned SExtSrc = MI.getOperand(1).getReg(); - LLT SExtSrcTy = MRI.getType(SExtSrc); - unsigned SizeDiff = DstTy.getSizeInBits() - SExtSrcTy.getSizeInBits(); - auto SizeDiffMIB = Builder.buildConstant(DstTy, SizeDiff); - unsigned TruncSrcReg = DefMI->getOperand(1).getReg(); - // We get a copy/trunc/extend depending on the sizes - auto SrcCopyExtOrTrunc = Builder.buildAnyExtOrTrunc(DstTy, TruncSrcReg); - auto ShlMIB = Builder.buildInstr(TargetOpcode::G_SHL, DstTy, - SrcCopyExtOrTrunc, SizeDiffMIB); - Builder.buildInstr(TargetOpcode::G_ASHR, DstReg, ShlMIB, SizeDiffMIB); - markInstAndDefDead(MI, *DefMI, DeadInsts); + LLT SrcTy = MRI.getType(SrcReg); + unsigned ShAmt = DstTy.getSizeInBits() - SrcTy.getSizeInBits(); + auto MIBShAmt = Builder.buildConstant(DstTy, ShAmt); + auto MIBShl = Builder.buildInstr( + TargetOpcode::G_SHL, DstTy, + Builder.buildAnyExtOrTrunc(DstTy, TruncSrc), MIBShAmt); + Builder.buildInstr(TargetOpcode::G_ASHR, DstReg, MIBShl, MIBShAmt); + markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts); return true; } return tryFoldImplicitDef(MI, DeadInsts); } - /// Try to fold sb = EXTEND (G_IMPLICIT_DEF sa) -> sb = G_IMPLICIT_DEF + /// Try to fold G_[ASZ]EXT (G_IMPLICIT_DEF). bool tryFoldImplicitDef(MachineInstr &MI, SmallVectorImpl &DeadInsts) { unsigned Opcode = MI.getOpcode(); @@ -119,13 +125,25 @@ public: if (MachineInstr *DefMI = getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MI.getOperand(1).getReg(), MRI)) { + Builder.setInstr(MI); unsigned DstReg = MI.getOperand(0).getReg(); LLT DstTy = MRI.getType(DstReg); - if (isInstUnsupported({TargetOpcode::G_IMPLICIT_DEF, {DstTy}})) - return false; - LLVM_DEBUG(dbgs() << ".. Combine EXT(IMPLICIT_DEF) " << MI;); - Builder.setInstr(MI); - Builder.buildInstr(TargetOpcode::G_IMPLICIT_DEF, DstReg); + + if (Opcode == TargetOpcode::G_ANYEXT) { + // G_ANYEXT (G_IMPLICIT_DEF) -> G_IMPLICIT_DEF + if (isInstUnsupported({TargetOpcode::G_IMPLICIT_DEF, {DstTy}})) + return false; + LLVM_DEBUG(dbgs() << ".. Combine G_ANYEXT(G_IMPLICIT_DEF): " << MI;); + Builder.buildInstr(TargetOpcode::G_IMPLICIT_DEF, DstReg); + } else { + // G_[SZ]EXT (G_IMPLICIT_DEF) -> G_CONSTANT 0 because the top + // bits will be 0 for G_ZEXT and 0/1 for the G_SEXT. + if (isInstUnsupported({TargetOpcode::G_CONSTANT, {DstTy}})) + return false; + LLVM_DEBUG(dbgs() << ".. Combine G_[SZ]EXT(G_IMPLICIT_DEF): " << MI;); + Builder.buildConstant(DstReg, 0); + } + markInstAndDefDead(MI, *DefMI, DeadInsts); return true; } @@ -277,6 +295,19 @@ private: auto Step = LI.getAction(Query); return Step.Action == Unsupported || Step.Action == NotFound; } + + /// Looks through copy instructions and returns the actual + /// source register. + unsigned lookThroughCopyInstrs(unsigned Reg) { + unsigned TmpReg; + while (mi_match(Reg, MRI, m_Copy(m_Reg(TmpReg)))) { + if (MRI.getType(TmpReg).isValid()) + Reg = TmpReg; + else + break; + } + return Reg; + } }; } // namespace llvm diff --git a/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h b/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h index a8c26082f2210f496f8f6b32ecadf56214c21604..755805de1b08e35bc4121cabb713053cfee44ab7 100644 --- a/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h +++ b/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h @@ -121,7 +121,7 @@ struct LegalityQuery { ArrayRef Types; struct MemDesc { - uint64_t Size; + uint64_t SizeInBits; AtomicOrdering Ordering; }; @@ -693,6 +693,8 @@ public: }, [=](const LegalityQuery &Query) { LLT VecTy = Query.Types[TypeIdx]; + if (MaxElements == 1) + return std::make_pair(TypeIdx, VecTy.getElementType()); return std::make_pair( TypeIdx, LLT::vector(MaxElements, VecTy.getScalarSizeInBits())); }); diff --git a/include/llvm/CodeGen/ISDOpcodes.h b/include/llvm/CodeGen/ISDOpcodes.h index ec9c46140d70556f3df5facb3552cc049ac065ff..ac620e4b69c501353ebf0986ede0751aa775d135 100644 --- a/include/llvm/CodeGen/ISDOpcodes.h +++ b/include/llvm/CodeGen/ISDOpcodes.h @@ -70,7 +70,7 @@ namespace ISD { /// of the frame or return address to return. An index of zero corresponds /// to the current function's frame or return address, an index of one to /// the parent's frame or return address, and so on. - FRAMEADDR, RETURNADDR, ADDROFRETURNADDR, + FRAMEADDR, RETURNADDR, ADDROFRETURNADDR, SPONENTRY, /// LOCAL_RECOVER - Represents the llvm.localrecover intrinsic. /// Materializes the offset from the local object pointer of another @@ -256,6 +256,22 @@ namespace ISD { /// Same for multiplication. SMULO, UMULO, + /// RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 + /// integers with the same bit width (W). If the true value of LHS + RHS + /// exceeds the largest value that can be represented by W bits, the + /// resulting value is this maximum value. Otherwise, if this value is less + /// than the smallest value that can be represented by W bits, the + /// resulting value is this minimum value. + SADDSAT, UADDSAT, + + /// RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 + /// integers with the same bit width (W). If the true value of LHS - RHS + /// exceeds the largest value that can be represented by W bits, the + /// resulting value is this maximum value. Otherwise, if this value is less + /// than the smallest value that can be represented by W bits, the + /// resulting value is this minimum value. + SSUBSAT, USUBSAT, + /// Simple binary floating point operators. FADD, FSUB, FMUL, FDIV, FREM, @@ -272,7 +288,8 @@ namespace ISD { /// They are used to limit optimizations while the DAG is being optimized. STRICT_FSQRT, STRICT_FPOW, STRICT_FPOWI, STRICT_FSIN, STRICT_FCOS, STRICT_FEXP, STRICT_FEXP2, STRICT_FLOG, STRICT_FLOG10, STRICT_FLOG2, - STRICT_FRINT, STRICT_FNEARBYINT, + STRICT_FRINT, STRICT_FNEARBYINT, STRICT_FMAXNUM, STRICT_FMINNUM, + STRICT_FCEIL, STRICT_FFLOOR, STRICT_FROUND, STRICT_FTRUNC, /// FMA - Perform a * b + c with no intermediate rounding step. FMA, @@ -556,13 +573,23 @@ namespace ISD { FCEIL, FTRUNC, FRINT, FNEARBYINT, FROUND, FFLOOR, /// FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two /// values. - /// In the case where a single input is NaN, the non-NaN input is returned. + // + /// In the case where a single input is a NaN (either signaling or quiet), + /// the non-NaN input is returned. /// /// The return value of (FMINNUM 0.0, -0.0) could be either 0.0 or -0.0. FMINNUM, FMAXNUM, - /// FMINNAN/FMAXNAN - Behave identically to FMINNUM/FMAXNUM, except that - /// when a single input is NaN, NaN is returned. - FMINNAN, FMAXNAN, + + /// FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimum or maximum on + /// two values, following the IEEE-754 2008 definition. This differs from + /// FMINNUM/FMAXNUM in the handling of signaling NaNs. If one input is a + /// signaling NaN, returns a quiet NaN. + FMINNUM_IEEE, FMAXNUM_IEEE, + + /// FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 + /// as less than 0.0. While FMINNUM_IEEE/FMAXNUM_IEEE follow IEEE 754-2008 + /// semantics, FMINIMUM/FMAXIMUM follow IEEE 754-2018 draft semantics. + FMINIMUM, FMAXIMUM, /// FSINCOS - Compute both fsin and fcos as a single operation. FSINCOS, diff --git a/include/llvm/CodeGen/LiveIntervals.h b/include/llvm/CodeGen/LiveIntervals.h index 291a07a712cb76459d88e2899cbebbb17035247d..16ab1dc475c41bf2ff5901a92c81de829233dd99 100644 --- a/include/llvm/CodeGen/LiveIntervals.h +++ b/include/llvm/CodeGen/LiveIntervals.h @@ -198,10 +198,10 @@ class VirtRegMap; void pruneValue(LiveRange &LR, SlotIndex Kill, SmallVectorImpl *EndPoints); - /// This function should not be used. Its intend is to tell you that - /// you are doing something wrong if you call pruveValue directly on a + /// This function should not be used. Its intent is to tell you that you are + /// doing something wrong if you call pruneValue directly on a /// LiveInterval. Indeed, you are supposed to call pruneValue on the main - /// LiveRange and all the LiveRange of the subranges if any. + /// LiveRange and all the LiveRanges of the subranges if any. LLVM_ATTRIBUTE_UNUSED void pruneValue(LiveInterval &, SlotIndex, SmallVectorImpl *) { llvm_unreachable( diff --git a/include/llvm/CodeGen/LivePhysRegs.h b/include/llvm/CodeGen/LivePhysRegs.h index 301a45066b4c63a62dd374c7ea1d18f193d937d2..7312902e21b71f0c3873c9a7d0a3f596d14c41ba 100644 --- a/include/llvm/CodeGen/LivePhysRegs.h +++ b/include/llvm/CodeGen/LivePhysRegs.h @@ -48,7 +48,8 @@ class raw_ostream; /// when walking backward/forward through a basic block. class LivePhysRegs { const TargetRegisterInfo *TRI = nullptr; - SparseSet LiveRegs; + using RegisterSet = SparseSet>; + RegisterSet LiveRegs; public: /// Constructs an unitialized set. init() needs to be called to initialize it. @@ -76,7 +77,7 @@ public: bool empty() const { return LiveRegs.empty(); } /// Adds a physical register and all its sub-registers to the set. - void addReg(unsigned Reg) { + void addReg(MCPhysReg Reg) { assert(TRI && "LivePhysRegs is not initialized."); assert(Reg <= TRI->getNumRegs() && "Expected a physical register."); for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); @@ -86,7 +87,7 @@ public: /// Removes a physical register, all its sub-registers, and all its /// super-registers from the set. - void removeReg(unsigned Reg) { + void removeReg(MCPhysReg Reg) { assert(TRI && "LivePhysRegs is not initialized."); assert(Reg <= TRI->getNumRegs() && "Expected a physical register."); for (MCRegAliasIterator R(Reg, TRI, true); R.isValid(); ++R) @@ -95,7 +96,7 @@ public: /// Removes physical registers clobbered by the regmask operand \p MO. void removeRegsInMask(const MachineOperand &MO, - SmallVectorImpl> *Clobbers = + SmallVectorImpl> *Clobbers = nullptr); /// Returns true if register \p Reg is contained in the set. This also @@ -103,10 +104,10 @@ public: /// addReg() always adds all sub-registers to the set as well. /// Note: Returns false if just some sub registers are live, use available() /// when searching a free register. - bool contains(unsigned Reg) const { return LiveRegs.count(Reg); } + bool contains(MCPhysReg Reg) const { return LiveRegs.count(Reg); } /// Returns true if register \p Reg and no aliasing register is in the set. - bool available(const MachineRegisterInfo &MRI, unsigned Reg) const; + bool available(const MachineRegisterInfo &MRI, MCPhysReg Reg) const; /// Remove defined registers and regmask kills from the set. void removeDefs(const MachineInstr &MI); @@ -126,7 +127,7 @@ public: /// defined or clobbered by a regmask. The operand will identify whether this /// is a regmask or register operand. void stepForward(const MachineInstr &MI, - SmallVectorImpl> &Clobbers); + SmallVectorImpl> &Clobbers); /// Adds all live-in registers of basic block \p MBB. /// Live in registers are the registers in the blocks live-in list and the @@ -143,7 +144,7 @@ public: /// registers. void addLiveOutsNoPristines(const MachineBasicBlock &MBB); - using const_iterator = SparseSet::const_iterator; + using const_iterator = RegisterSet::const_iterator; const_iterator begin() const { return LiveRegs.begin(); } const_iterator end() const { return LiveRegs.end(); } diff --git a/include/llvm/CodeGen/LiveRegUnits.h b/include/llvm/CodeGen/LiveRegUnits.h index 249545906e01d10c11215aa355783b9cd6956aac..5e9dd8b3cdf6983387469fd9975eb171bbfdca85 100644 --- a/include/llvm/CodeGen/LiveRegUnits.h +++ b/include/llvm/CodeGen/LiveRegUnits.h @@ -85,14 +85,14 @@ public: bool empty() const { return Units.none(); } /// Adds register units covered by physical register \p Reg. - void addReg(unsigned Reg) { + void addReg(MCPhysReg Reg) { for (MCRegUnitIterator Unit(Reg, TRI); Unit.isValid(); ++Unit) Units.set(*Unit); } /// Adds register units covered by physical register \p Reg that are /// part of the lanemask \p Mask. - void addRegMasked(unsigned Reg, LaneBitmask Mask) { + void addRegMasked(MCPhysReg Reg, LaneBitmask Mask) { for (MCRegUnitMaskIterator Unit(Reg, TRI); Unit.isValid(); ++Unit) { LaneBitmask UnitMask = (*Unit).second; if (UnitMask.none() || (UnitMask & Mask).any()) @@ -101,7 +101,7 @@ public: } /// Removes all register units covered by physical register \p Reg. - void removeReg(unsigned Reg) { + void removeReg(MCPhysReg Reg) { for (MCRegUnitIterator Unit(Reg, TRI); Unit.isValid(); ++Unit) Units.reset(*Unit); } @@ -115,7 +115,7 @@ public: void addRegsInMask(const uint32_t *RegMask); /// Returns true if no part of physical register \p Reg is live. - bool available(unsigned Reg) const { + bool available(MCPhysReg Reg) const { for (MCRegUnitIterator Unit(Reg, TRI); Unit.isValid(); ++Unit) { if (Units.test(*Unit)) return false; diff --git a/include/llvm/CodeGen/MIRYamlMapping.h b/include/llvm/CodeGen/MIRYamlMapping.h index dc9057521e7e48935553786d68b35f9043c2f919..98ac81915dc05c25d153d2b211b9eb798acae5c5 100644 --- a/include/llvm/CodeGen/MIRYamlMapping.h +++ b/include/llvm/CodeGen/MIRYamlMapping.h @@ -494,6 +494,7 @@ struct MachineFunction { bool FailedISel = false; // Register information bool TracksRegLiveness = false; + bool HasWinCFI = false; std::vector VirtualRegisters; std::vector LiveIns; Optional> CalleeSavedRegisters; @@ -517,6 +518,7 @@ template <> struct MappingTraits { YamlIO.mapOptional("selected", MF.Selected, false); YamlIO.mapOptional("failedISel", MF.FailedISel, false); YamlIO.mapOptional("tracksRegLiveness", MF.TracksRegLiveness, false); + YamlIO.mapOptional("hasWinCFI", MF.HasWinCFI, false); YamlIO.mapOptional("registers", MF.VirtualRegisters, std::vector()); YamlIO.mapOptional("liveins", MF.LiveIns, diff --git a/include/llvm/CodeGen/MachineFunction.h b/include/llvm/CodeGen/MachineFunction.h index 7471b31484644e47c76b316f2826b5563070dbe2..35305bd53b28908fe0fcf45396822838c1043c5c 100644 --- a/include/llvm/CodeGen/MachineFunction.h +++ b/include/llvm/CodeGen/MachineFunction.h @@ -58,6 +58,7 @@ class DILocalVariable; class DILocation; class Function; class GlobalValue; +class LLVMTargetMachine; class MachineConstantPool; class MachineFrameInfo; class MachineFunction; @@ -70,7 +71,6 @@ class Pass; class PseudoSourceValueManager; class raw_ostream; class SlotIndexes; -class TargetMachine; class TargetRegisterClass; class TargetSubtargetInfo; struct WasmEHFuncInfo; @@ -225,7 +225,7 @@ struct LandingPadInfo { class MachineFunction { const Function &F; - const TargetMachine &Target; + const LLVMTargetMachine &Target; const TargetSubtargetInfo *STI; MCContext &Ctx; MachineModuleInfo &MMI; @@ -316,6 +316,9 @@ class MachineFunction { /// Map a landing pad's EH symbol to the call site indexes. DenseMap> LPadToCallSiteMap; + /// Map a landing pad to its index. + DenseMap WasmLPadToIndexMap; + /// Map of invoke call site index values to associated begin EH_LABEL. DenseMap CallSiteMap; @@ -385,7 +388,7 @@ public: using VariableDbgInfoMapTy = SmallVector; VariableDbgInfoMapTy VariableDbgInfos; - MachineFunction(const Function &F, const TargetMachine &Target, + MachineFunction(const Function &F, const LLVMTargetMachine &Target, const TargetSubtargetInfo &STI, unsigned FunctionNum, MachineModuleInfo &MMI); MachineFunction(const MachineFunction &) = delete; @@ -433,7 +436,7 @@ public: unsigned getFunctionNumber() const { return FunctionNumber; } /// getTarget - Return the target machine this machine code is compiled with - const TargetMachine &getTarget() const { return Target; } + const LLVMTargetMachine &getTarget() const { return Target; } /// getSubtarget - Return the subtarget for which this machine code is being /// compiled. @@ -810,7 +813,8 @@ public: LandingPadInfo &getOrCreateLandingPadInfo(MachineBasicBlock *LandingPad); /// Remap landing pad labels and remove any deleted landing pads. - void tidyLandingPads(DenseMap *LPMap = nullptr); + void tidyLandingPads(DenseMap *LPMap = nullptr, + bool TidyIfNoBeginLabels = true); /// Return a reference to the landing pad info for the current function. const std::vector &getLandingPads() const { @@ -853,6 +857,22 @@ public: /// Map the landing pad's EH symbol to the call site indexes. void setCallSiteLandingPad(MCSymbol *Sym, ArrayRef Sites); + /// Map the landing pad to its index. Used for Wasm exception handling. + void setWasmLandingPadIndex(const MachineBasicBlock *LPad, unsigned Index) { + WasmLPadToIndexMap[LPad] = Index; + } + + /// Returns true if the landing pad has an associate index in wasm EH. + bool hasWasmLandingPadIndex(const MachineBasicBlock *LPad) const { + return WasmLPadToIndexMap.count(LPad); + } + + /// Get the index in wasm EH for a given landing pad. + unsigned getWasmLandingPadIndex(const MachineBasicBlock *LPad) const { + assert(hasWasmLandingPadIndex(LPad)); + return WasmLPadToIndexMap.lookup(LPad); + } + /// Get the call site indexes for a landing pad EH symbol. SmallVectorImpl &getCallSiteLandingPad(MCSymbol *Sym) { assert(hasCallSiteLandingPad(Sym) && diff --git a/include/llvm/CodeGen/MachineInstr.h b/include/llvm/CodeGen/MachineInstr.h index 7c4e771ce7215aad394bfcd59832056f464259f4..ea1a2a536fc7377e0e33eadbee60acbbdb3be6a6 100644 --- a/include/llvm/CodeGen/MachineInstr.h +++ b/include/llvm/CodeGen/MachineInstr.h @@ -408,7 +408,7 @@ public: /// Returns the opcode of this MachineInstr. unsigned getOpcode() const { return MCID->Opcode; } - /// Access to explicit operands of the instruction. + /// Retuns the total number of operands. unsigned getNumOperands() const { return NumOperands; } const MachineOperand& getOperand(unsigned i) const { diff --git a/include/llvm/CodeGen/MachineModuleInfo.h b/include/llvm/CodeGen/MachineModuleInfo.h index 554e89019b76fd8b345e9340ff99f992dfd29b5b..4371420bc7a2bb83f935a0ddb50d47b99c9d1bdd 100644 --- a/include/llvm/CodeGen/MachineModuleInfo.h +++ b/include/llvm/CodeGen/MachineModuleInfo.h @@ -46,10 +46,10 @@ namespace llvm { class BasicBlock; class CallInst; class Function; -class MachineFunction; +class LLVMTargetMachine; class MMIAddrLabelMap; +class MachineFunction; class Module; -class TargetMachine; //===----------------------------------------------------------------------===// /// This class can be derived from and used by targets to hold private @@ -76,7 +76,7 @@ protected: /// for specific use. /// class MachineModuleInfo : public ImmutablePass { - const TargetMachine &TM; + const LLVMTargetMachine &TM; /// This is the MCContext used for the entire code generator. MCContext Context; @@ -145,7 +145,7 @@ class MachineModuleInfo : public ImmutablePass { public: static char ID; // Pass identification, replacement for typeid - explicit MachineModuleInfo(const TargetMachine *TM = nullptr); + explicit MachineModuleInfo(const LLVMTargetMachine *TM = nullptr); ~MachineModuleInfo() override; // Initialization and Finalization diff --git a/include/llvm/CodeGen/MachineOutliner.h b/include/llvm/CodeGen/MachineOutliner.h index 95bfc24b57ffad600775ca95bf9c4238f1f3fd61..eaa741353abb0c4cb27298ac95bfe5bdeedd8dc5 100644 --- a/include/llvm/CodeGen/MachineOutliner.h +++ b/include/llvm/CodeGen/MachineOutliner.h @@ -169,9 +169,6 @@ public: /// This is initialized after we go through and create the actual function. MachineFunction *MF = nullptr; - /// A number assigned to this function which appears at the end of its name. - unsigned Name; - /// The sequence of integers corresponding to the instructions in this /// function. std::vector Sequence; diff --git a/include/llvm/CodeGen/MachineRegisterInfo.h b/include/llvm/CodeGen/MachineRegisterInfo.h index a6836a53f048dc344a0f107e5a14492eb6af213c..fef010a23ef9d3b3e9477f4d73fa1d79c64c2392 100644 --- a/include/llvm/CodeGen/MachineRegisterInfo.h +++ b/include/llvm/CodeGen/MachineRegisterInfo.h @@ -689,15 +689,14 @@ public: unsigned MinNumRegs = 0); /// Constrain the register class or the register bank of the virtual register - /// \p Reg to be a common subclass and a common bank of both registers - /// provided respectively. Do nothing if any of the attributes (classes, - /// banks, or low-level types) of the registers are deemed incompatible, or if - /// the resulting register will have a class smaller than before and of size - /// less than \p MinNumRegs. Return true if such register attributes exist, - /// false otherwise. + /// \p Reg (and low-level type) to be a common subclass or a common bank of + /// both registers provided respectively (and a common low-level type). Do + /// nothing if any of the attributes (classes, banks, or low-level types) of + /// the registers are deemed incompatible, or if the resulting register will + /// have a class smaller than before and of size less than \p MinNumRegs. + /// Return true if such register attributes exist, false otherwise. /// - /// \note Assumes that each register has either a low-level type or a class - /// assigned, but not both. Use this method instead of constrainRegClass and + /// \note Use this method instead of constrainRegClass and /// RegisterBankInfo::constrainGenericRegister everywhere but SelectionDAG /// ISel / FastISel and GlobalISel's InstructionSelect pass respectively. bool constrainRegAttrs(unsigned Reg, unsigned ConstrainingReg, diff --git a/include/llvm/CodeGen/RegisterUsageInfo.h b/include/llvm/CodeGen/RegisterUsageInfo.h index efd175eeed3041b223b1a3b4670caad121281e14..efecc61d9c3039dbb7ef4c12278c78b565f64dce 100644 --- a/include/llvm/CodeGen/RegisterUsageInfo.h +++ b/include/llvm/CodeGen/RegisterUsageInfo.h @@ -29,7 +29,7 @@ namespace llvm { class Function; -class TargetMachine; +class LLVMTargetMachine; class PhysicalRegisterUsageInfo : public ImmutablePass { public: @@ -41,7 +41,7 @@ public: } /// Set TargetMachine which is used to print analysis. - void setTargetMachine(const TargetMachine &TM); + void setTargetMachine(const LLVMTargetMachine &TM); bool doInitialization(Module &M) override; @@ -63,7 +63,7 @@ private: /// and 1 means content of register will be preserved around function call. DenseMap> RegMasks; - const TargetMachine *TM; + const LLVMTargetMachine *TM; }; } // end namespace llvm diff --git a/include/llvm/CodeGen/ScheduleDAG.h b/include/llvm/CodeGen/ScheduleDAG.h index f2b072768b2597e5694e348ed919fbc4f9fc15e1..0870d67db390c4dfee649df311d8e633ce9e49bc 100644 --- a/include/llvm/CodeGen/ScheduleDAG.h +++ b/include/llvm/CodeGen/ScheduleDAG.h @@ -33,15 +33,15 @@ namespace llvm { template class GraphWriter; +class LLVMTargetMachine; class MachineFunction; class MachineRegisterInfo; class MCInstrDesc; struct MCSchedClassDesc; -class ScheduleDAG; class SDNode; class SUnit; +class ScheduleDAG; class TargetInstrInfo; -class TargetMachine; class TargetRegisterClass; class TargetRegisterInfo; @@ -558,7 +558,7 @@ class TargetRegisterInfo; class ScheduleDAG { public: - const TargetMachine &TM; ///< Target processor + const LLVMTargetMachine &TM; ///< Target processor const TargetInstrInfo *TII; ///< Target instruction information const TargetRegisterInfo *TRI; ///< Target processor register info MachineFunction &MF; ///< Machine function diff --git a/include/llvm/CodeGen/SelectionDAG.h b/include/llvm/CodeGen/SelectionDAG.h index 973a3ddb1bace2bf6766e300db662ce7c9748cd3..3b144b92e2a6eb93499429e32741c2f93ec2d911 100644 --- a/include/llvm/CodeGen/SelectionDAG.h +++ b/include/llvm/CodeGen/SelectionDAG.h @@ -786,24 +786,6 @@ public: /// value assuming it was the smaller SrcTy value. SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT); - /// Return an operation which will any-extend the low lanes of the operand - /// into the specified vector type. For example, - /// this can convert a v16i8 into a v4i32 by any-extending the low four - /// lanes of the operand from i8 to i32. - SDValue getAnyExtendVectorInReg(SDValue Op, const SDLoc &DL, EVT VT); - - /// Return an operation which will sign extend the low lanes of the operand - /// into the specified vector type. For example, - /// this can convert a v16i8 into a v4i32 by sign extending the low four - /// lanes of the operand from i8 to i32. - SDValue getSignExtendVectorInReg(SDValue Op, const SDLoc &DL, EVT VT); - - /// Return an operation which will zero extend the low lanes of the operand - /// into the specified vector type. For example, - /// this can convert a v16i8 into a v4i32 by zero extending the low four - /// lanes of the operand from i8 to i32. - SDValue getZeroExtendVectorInReg(SDValue Op, const SDLoc &DL, EVT VT); - /// Convert Op, which must be of integer type, to the integer type VT, /// by using an extension appropriate for the target's /// BooleanContent for type OpVT or truncating it. diff --git a/include/llvm/CodeGen/SelectionDAGAddressAnalysis.h b/include/llvm/CodeGen/SelectionDAGAddressAnalysis.h index 580606441a9d0601d0ffad7efd32e456c070dded..2b2c48d57bc0fac3a04f9dbf09491194ab843c68 100644 --- a/include/llvm/CodeGen/SelectionDAGAddressAnalysis.h +++ b/include/llvm/CodeGen/SelectionDAGAddressAnalysis.h @@ -45,18 +45,21 @@ public: IsIndexSignExt(IsIndexSignExt) {} SDValue getBase() { return Base; } + SDValue getBase() const { return Base; } SDValue getIndex() { return Index; } + SDValue getIndex() const { return Index; } - bool equalBaseIndex(BaseIndexOffset &Other, const SelectionDAG &DAG) { + bool equalBaseIndex(const BaseIndexOffset &Other, + const SelectionDAG &DAG) const { int64_t Off; return equalBaseIndex(Other, DAG, Off); } - bool equalBaseIndex(BaseIndexOffset &Other, const SelectionDAG &DAG, - int64_t &Off); + bool equalBaseIndex(const BaseIndexOffset &Other, const SelectionDAG &DAG, + int64_t &Off) const; /// Parses tree in Ptr for base, index, offset addresses. - static BaseIndexOffset match(LSBaseSDNode *N, const SelectionDAG &DAG); + static BaseIndexOffset match(const LSBaseSDNode *N, const SelectionDAG &DAG); }; } // end namespace llvm diff --git a/include/llvm/CodeGen/SelectionDAGNodes.h b/include/llvm/CodeGen/SelectionDAGNodes.h index 28d27b7a459d685fef095d3afda7adba654daced..d125e888a5742d145f10ea43ec8046e917dcc078 100644 --- a/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/include/llvm/CodeGen/SelectionDAGNodes.h @@ -672,6 +672,12 @@ public: case ISD::STRICT_FLOG2: case ISD::STRICT_FRINT: case ISD::STRICT_FNEARBYINT: + case ISD::STRICT_FMAXNUM: + case ISD::STRICT_FMINNUM: + case ISD::STRICT_FCEIL: + case ISD::STRICT_FFLOOR: + case ISD::STRICT_FROUND: + case ISD::STRICT_FTRUNC: return true; } } diff --git a/include/llvm/CodeGen/SlotIndexes.h b/include/llvm/CodeGen/SlotIndexes.h index 55082222b7aea7d7fb048a80f36e5bf6c0b41b51..8c8a7be459fd5a605c23b3b16b30fda12e22ff06 100644 --- a/include/llvm/CodeGen/SlotIndexes.h +++ b/include/llvm/CodeGen/SlotIndexes.h @@ -413,10 +413,14 @@ class raw_ostream; /// Returns the base index for the given instruction. SlotIndex getInstructionIndex(const MachineInstr &MI) const { // Instructions inside a bundle have the same number as the bundle itself. - const MachineInstr &BundleStart = *getBundleStart(MI.getIterator()); - assert(!BundleStart.isDebugInstr() && + auto BundleStart = getBundleStart(MI.getIterator()); + auto BundleEnd = getBundleEnd(MI.getIterator()); + // Use the first non-debug instruction in the bundle to get SlotIndex. + const MachineInstr &BundleNonDebug = + *skipDebugInstructionsForward(BundleStart, BundleEnd); + assert(!BundleNonDebug.isDebugInstr() && "Could not use a debug instruction to query mi2iMap."); - Mi2IndexMap::const_iterator itr = mi2iMap.find(&BundleStart); + Mi2IndexMap::const_iterator itr = mi2iMap.find(&BundleNonDebug); assert(itr != mi2iMap.end() && "Instruction not found in maps."); return itr->second; } @@ -444,7 +448,7 @@ class raw_ostream; /// MI is not required to have an index. SlotIndex getIndexBefore(const MachineInstr &MI) const { const MachineBasicBlock *MBB = MI.getParent(); - assert(MBB && "MI must be inserted inna basic block"); + assert(MBB && "MI must be inserted in a basic block"); MachineBasicBlock::const_iterator I = MI, B = MBB->begin(); while (true) { if (I == B) @@ -461,7 +465,7 @@ class raw_ostream; /// MI is not required to have an index. SlotIndex getIndexAfter(const MachineInstr &MI) const { const MachineBasicBlock *MBB = MI.getParent(); - assert(MBB && "MI must be inserted inna basic block"); + assert(MBB && "MI must be inserted in a basic block"); MachineBasicBlock::const_iterator I = MI, E = MBB->end(); while (true) { ++I; diff --git a/include/llvm/CodeGen/TargetLowering.h b/include/llvm/CodeGen/TargetLowering.h index a5939070476e3e4f24c858e6e2d67436dbdf5145..38e575b1360fb1e6d38eec123d348ef058d8468b 100644 --- a/include/llvm/CodeGen/TargetLowering.h +++ b/include/llvm/CodeGen/TargetLowering.h @@ -279,7 +279,7 @@ public: /// Return the preferred vector type legalization action. virtual TargetLoweringBase::LegalizeTypeAction - getPreferredVectorAction(EVT VT) const { + getPreferredVectorAction(MVT VT) const { // The default action for one element vectors is to scalarize if (VT.getVectorNumElements() == 1) return TypeScalarizeVector; @@ -819,6 +819,12 @@ public: case ISD::STRICT_FLOG2: EqOpc = ISD::FLOG2; break; case ISD::STRICT_FRINT: EqOpc = ISD::FRINT; break; case ISD::STRICT_FNEARBYINT: EqOpc = ISD::FNEARBYINT; break; + case ISD::STRICT_FMAXNUM: EqOpc = ISD::FMAXNUM; break; + case ISD::STRICT_FMINNUM: EqOpc = ISD::FMINNUM; break; + case ISD::STRICT_FCEIL: EqOpc = ISD::FCEIL; break; + case ISD::STRICT_FFLOOR: EqOpc = ISD::FFLOOR; break; + case ISD::STRICT_FROUND: EqOpc = ISD::FROUND; break; + case ISD::STRICT_FTRUNC: EqOpc = ISD::FTRUNC; break; } auto Action = getOperationAction(EqOpc, VT); @@ -2058,6 +2064,14 @@ public: return true; } + /// Return true if the specified immediate is legal for the value input of a + /// store instruction. + virtual bool isLegalStoreImmediate(int64_t Value) const { + // Default implementation assumes that at least 0 works since it is likely + // that a zero register exists or a zero immediate is allowed. + return Value == 0; + } + /// Return true if it's significantly cheaper to shift a vector by a uniform /// scalar than by an amount which will vary across each lane. On x86, for /// example, there is a "psllw" instruction for the former case, but no simple @@ -2091,8 +2105,8 @@ public: case ISD::ADDE: case ISD::FMINNUM: case ISD::FMAXNUM: - case ISD::FMINNAN: - case ISD::FMAXNAN: + case ISD::FMINIMUM: + case ISD::FMAXIMUM: return true; default: return false; } @@ -2908,11 +2922,22 @@ public: /// elements, returning true on success. Otherwise, analyze the expression and /// return a mask of KnownUndef and KnownZero elements for the expression /// (used to simplify the caller). The KnownUndef/Zero elements may only be - /// accurate for those bits in the DemandedMask + /// accurate for those bits in the DemandedMask. virtual bool SimplifyDemandedVectorEltsForTargetNode( SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth = 0) const; + /// Attempt to simplify any target nodes based on the demanded bits, + /// returning true on success. Otherwise, analyze the + /// expression and return a mask of KnownOne and KnownZero bits for the + /// expression (used to simplify the caller). The KnownZero/One bits may only + /// be accurate for those bits in the DemandedMask. + virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op, + const APInt &DemandedBits, + KnownBits &Known, + TargetLoweringOpt &TLO, + unsigned Depth = 0) const; + /// If \p SNaN is false, \returns true if \p Op is known to never be any /// NaN. If \p sNaN is true, returns if \p Op is known to never be a signaling /// NaN. @@ -3644,6 +3669,42 @@ public: /// \returns True, if the expansion was successful, false otherwise bool expandFP_TO_SINT(SDNode *N, SDValue &Result, SelectionDAG &DAG) const; + /// Expand float to UINT conversion + /// \param N Node to expand + /// \param Result output after conversion + /// \returns True, if the expansion was successful, false otherwise + bool expandFP_TO_UINT(SDNode *N, SDValue &Result, SelectionDAG &DAG) const; + + /// Expand UINT(i64) to double(f64) conversion + /// \param N Node to expand + /// \param Result output after conversion + /// \returns True, if the expansion was successful, false otherwise + bool expandUINT_TO_FP(SDNode *N, SDValue &Result, SelectionDAG &DAG) const; + + /// Expand fminnum/fmaxnum into fminnum_ieee/fmaxnum_ieee with quieted inputs. + SDValue expandFMINNUM_FMAXNUM(SDNode *N, SelectionDAG &DAG) const; + + /// Expand CTPOP nodes. Expands vector/scalar CTPOP nodes, + /// vector nodes can only succeed if all operations are legal/custom. + /// \param N Node to expand + /// \param Result output after conversion + /// \returns True, if the expansion was successful, false otherwise + bool expandCTPOP(SDNode *N, SDValue &Result, SelectionDAG &DAG) const; + + /// Expand CTLZ/CTLZ_ZERO_UNDEF nodes. Expands vector/scalar CTLZ nodes, + /// vector nodes can only succeed if all operations are legal/custom. + /// \param N Node to expand + /// \param Result output after conversion + /// \returns True, if the expansion was successful, false otherwise + bool expandCTLZ(SDNode *N, SDValue &Result, SelectionDAG &DAG) const; + + /// Expand CTTZ/CTTZ_ZERO_UNDEF nodes. Expands vector/scalar CTTZ nodes, + /// vector nodes can only succeed if all operations are legal/custom. + /// \param N Node to expand + /// \param Result output after conversion + /// \returns True, if the expansion was successful, false otherwise + bool expandCTTZ(SDNode *N, SDValue &Result, SelectionDAG &DAG) const; + /// Turn load of vector type into a load of the individual elements. /// \param LD load to expand /// \returns MERGE_VALUEs of the scalar loads with their chains. @@ -3681,6 +3742,11 @@ public: SDValue getVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, SDValue Index) const; + /// Method for building the DAG expansion of ISD::[US][ADD|SUB]SAT. This + /// method accepts integers or vectors of integers as its arguments. + SDValue getExpandedSaturationAdditionSubtraction(SDNode *Node, + SelectionDAG &DAG) const; + //===--------------------------------------------------------------------===// // Instruction Emitting Hooks // diff --git a/include/llvm/CodeGen/TargetPassConfig.h b/include/llvm/CodeGen/TargetPassConfig.h index 8f5c9cb8c3fa8a0c1f4850b36752bc5aa95972db..7fda8751d40ae9a296b0ca177c37f29f5ac854db 100644 --- a/include/llvm/CodeGen/TargetPassConfig.h +++ b/include/llvm/CodeGen/TargetPassConfig.h @@ -145,13 +145,13 @@ public: CodeGenOpt::Level getOptLevel() const; - /// Describe the status of the codegen - /// pipeline set by this target pass config. - /// Having a limited codegen pipeline means that options - /// have been used to restrict what codegen is doing. - /// In particular, that means that codegen won't emit - /// assembly code. - bool hasLimitedCodeGenPipeline() const; + /// Returns true if one of the `-start-after`, `-start-before`, `-stop-after` + /// or `-stop-before` options is set. + static bool hasLimitedCodeGenPipeline(); + + /// Returns true if none of the `-stop-before` and `-stop-after` options is + /// set. + static bool willCompleteCodeGenPipeline(); /// If hasLimitedCodeGenPipeline is true, this method /// returns a string with the name of the options, separated @@ -159,13 +159,6 @@ public: std::string getLimitedCodeGenPipelineReason(const char *Separator = "/") const; - /// Check if the codegen pipeline is limited in such a way that it - /// won't be complete. When the codegen pipeline is not complete, - /// this means it may not be possible to generate assembly from it. - bool willCompleteCodeGenPipeline() const { - return !hasLimitedCodeGenPipeline() || (!StopAfter && !StopBefore); - } - void setDisableVerify(bool Disable) { setOpt(DisableVerify, Disable); } bool getEnableTailMerge() const { return EnableTailMerge; } diff --git a/include/llvm/CodeGen/TargetSubtargetInfo.h b/include/llvm/CodeGen/TargetSubtargetInfo.h index e28673de2253081cadf66290799f0cc9143aa3b0..968e4c4b810273ae3873459158d7f611358f9079 100644 --- a/include/llvm/CodeGen/TargetSubtargetInfo.h +++ b/include/llvm/CodeGen/TargetSubtargetInfo.h @@ -169,6 +169,19 @@ public: return isZeroIdiom(MI, Mask); } + /// Returns true if MI is a candidate for move elimination. + /// + /// A candidate for move elimination may be optimized out at register renaming + /// stage. Subtargets can specify the set of optimizable moves by + /// instantiating tablegen class `IsOptimizableRegisterMove` (see + /// llvm/Target/TargetInstrPredicate.td). + /// + /// SubtargetEmitter is responsible for processing all the definitions of class + /// IsOptimizableRegisterMove, and auto-generate an override for this method. + virtual bool isOptimizableRegisterMove(const MachineInstr *MI) const { + return false; + } + /// True if the subtarget should run MachineScheduler after aggressive /// coalescing. /// diff --git a/include/llvm/DebugInfo/CodeView/SymbolDeserializer.h b/include/llvm/DebugInfo/CodeView/SymbolDeserializer.h index b5479db97a1508dbe8fb199df6bf4c78d9ab7de7..6b5dd2d20d170b60bd8a5bf9ed3b1eb6dadd187f 100644 --- a/include/llvm/DebugInfo/CodeView/SymbolDeserializer.h +++ b/include/llvm/DebugInfo/CodeView/SymbolDeserializer.h @@ -47,7 +47,7 @@ public: return Error::success(); } template static Expected deserializeAs(CVSymbol Symbol) { - T Record(Symbol.kind()); + T Record(static_cast(Symbol.kind())); if (auto EC = deserializeAs(Symbol, Record)) return std::move(EC); return Record; diff --git a/include/llvm/DebugInfo/CodeView/TypeIndex.h b/include/llvm/DebugInfo/CodeView/TypeIndex.h index 681b5f3aca9433182e99ff7d3cb312e7ec02a24b..58463a6b13dff3ed560644515b73f931f233d621 100644 --- a/include/llvm/DebugInfo/CodeView/TypeIndex.h +++ b/include/llvm/DebugInfo/CodeView/TypeIndex.h @@ -145,6 +145,13 @@ public: return TypeIndex(SimpleTypeKind::Void, SimpleTypeMode::NearPointer64); } + static TypeIndex NullptrT() { + // std::nullptr_t uses the pointer mode that doesn't indicate bit-width, + // presumably because std::nullptr_t is intended to be compatible with any + // pointer type. + return TypeIndex(SimpleTypeKind::Void, SimpleTypeMode::NearPointer); + } + static TypeIndex SignedCharacter() { return TypeIndex(SimpleTypeKind::SignedCharacter); } diff --git a/include/llvm/DebugInfo/CodeView/TypeRecord.h b/include/llvm/DebugInfo/CodeView/TypeRecord.h index ee6f53854e7ab302e6ffb63357deb08c8d6ef0e9..76f1f98ab660ae678ed5e4a50877eed37212aa24 100644 --- a/include/llvm/DebugInfo/CodeView/TypeRecord.h +++ b/include/llvm/DebugInfo/CodeView/TypeRecord.h @@ -429,6 +429,10 @@ public: return (Options & ClassOptions::ForwardReference) != ClassOptions::None; } + bool isScoped() const { + return (Options & ClassOptions::Scoped) != ClassOptions::None; + } + uint16_t getMemberCount() const { return MemberCount; } ClassOptions getOptions() const { return Options; } TypeIndex getFieldList() const { return FieldList; } @@ -655,7 +659,17 @@ public: ArrayRef getArgs() const { return ArgIndices; } - SmallVector ArgIndices; + /// Indices of known build info arguments. + enum BuildInfoArg { + CurrentDirectory, ///< Absolute CWD path + BuildTool, ///< Absolute compiler path + SourceFile, ///< Path to main source file, relative or absolute + TypeServerPDB, ///< Absolute path of type server PDB (/Fd) + CommandLine, ///< Full canonical command line (maybe -cc1) + MaxArgs + }; + + SmallVector ArgIndices; }; // LF_VFTABLE diff --git a/include/llvm/DebugInfo/CodeView/TypeStreamMerger.h b/include/llvm/DebugInfo/CodeView/TypeStreamMerger.h index 583740d2eb4b78fc31a8ec474277a45b254a8854..a84f074237de0a8784903627c7b554515711cb98 100644 --- a/include/llvm/DebugInfo/CodeView/TypeStreamMerger.h +++ b/include/llvm/DebugInfo/CodeView/TypeStreamMerger.h @@ -83,18 +83,21 @@ Error mergeIdRecords(MergingTypeTableBuilder &Dest, ArrayRef Types, Error mergeTypeAndIdRecords(MergingTypeTableBuilder &DestIds, MergingTypeTableBuilder &DestTypes, SmallVectorImpl &SourceToDest, - const CVTypeArray &IdsAndTypes); + const CVTypeArray &IdsAndTypes, + Optional &EndPrecomp); Error mergeTypeAndIdRecords(GlobalTypeTableBuilder &DestIds, GlobalTypeTableBuilder &DestTypes, SmallVectorImpl &SourceToDest, const CVTypeArray &IdsAndTypes, - ArrayRef Hashes); + ArrayRef Hashes, + Optional &EndPrecomp); Error mergeTypeRecords(GlobalTypeTableBuilder &Dest, SmallVectorImpl &SourceToDest, const CVTypeArray &Types, - ArrayRef Hashes); + ArrayRef Hashes, + Optional &EndPrecomp); Error mergeIdRecords(GlobalTypeTableBuilder &Dest, ArrayRef Types, SmallVectorImpl &SourceToDest, diff --git a/include/llvm/DebugInfo/DWARF/DWARFCompileUnit.h b/include/llvm/DebugInfo/DWARF/DWARFCompileUnit.h index 27d56d72f0a7ac819dc796f08680e2d5bc7ff3de..33797419a7b8fb687ab0da09902a81bbeb51dbcc 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFCompileUnit.h +++ b/include/llvm/DebugInfo/DWARF/DWARFCompileUnit.h @@ -18,13 +18,13 @@ namespace llvm { class DWARFCompileUnit : public DWARFUnit { public: DWARFCompileUnit(DWARFContext &Context, const DWARFSection &Section, - const DWARFUnitHeader &Header, - const DWARFDebugAbbrev *DA, const DWARFSection *RS, + const DWARFUnitHeader &Header, const DWARFDebugAbbrev *DA, + const DWARFSection *RS, const DWARFSection *LocSection, StringRef SS, const DWARFSection &SOS, const DWARFSection *AOS, const DWARFSection &LS, bool LE, bool IsDWO, const DWARFUnitVector &UnitVector) - : DWARFUnit(Context, Section, Header, DA, RS, SS, SOS, AOS, LS, LE, IsDWO, - UnitVector) {} + : DWARFUnit(Context, Section, Header, DA, RS, LocSection, SS, SOS, AOS, + LS, LE, IsDWO, UnitVector) {} /// VTable anchor. ~DWARFCompileUnit() override; diff --git a/include/llvm/DebugInfo/DWARF/DWARFContext.h b/include/llvm/DebugInfo/DWARF/DWARFContext.h index c5b98ea5a2aacb321e1f689a95519c63437641f1..221f1f796980f343eb92ce666b54c679f8b6896c 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFContext.h +++ b/include/llvm/DebugInfo/DWARF/DWARFContext.h @@ -76,7 +76,7 @@ class DWARFContext : public DIContext { DWARFUnitVector DWOUnits; std::unique_ptr AbbrevDWO; - std::unique_ptr LocDWO; + std::unique_ptr LocDWO; /// The maximum DWARF version of all units. unsigned MaxVersion = 0; @@ -262,7 +262,7 @@ public: const DWARFDebugAbbrev *getDebugAbbrevDWO(); /// Get a pointer to the parsed DebugLoc object. - const DWARFDebugLocDWO *getDebugLocDWO(); + const DWARFDebugLoclists *getDebugLocDWO(); /// Get a pointer to the parsed DebugAranges object. const DWARFDebugAranges *getDebugAranges(); diff --git a/include/llvm/DebugInfo/DWARF/DWARFDebugLoc.h b/include/llvm/DebugInfo/DWARF/DWARFDebugLoc.h index 9a73745fb6b4d82ed7f5fc0f9f7ac7bd9ce91cf7..da2098e15402bb883913216cd4894740a55986f3 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFDebugLoc.h +++ b/include/llvm/DebugInfo/DWARF/DWARFDebugLoc.h @@ -73,19 +73,21 @@ public: uint32_t *Offset); }; -class DWARFDebugLocDWO { +class DWARFDebugLoclists { public: struct Entry { - uint64_t Start; - uint32_t Length; + uint8_t Kind; + uint64_t Value0; + uint64_t Value1; SmallVector Loc; }; struct LocationList { unsigned Offset; SmallVector Entries; - void dump(raw_ostream &OS, bool IsLittleEndian, unsigned AddressSize, - const MCRegisterInfo *RegInfo, unsigned Indent) const; + void dump(raw_ostream &OS, uint64_t BaseAddr, bool IsLittleEndian, + unsigned AddressSize, const MCRegisterInfo *RegInfo, + unsigned Indent) const; }; private: @@ -98,15 +100,15 @@ private: bool IsLittleEndian; public: - void parse(DataExtractor data); - void dump(raw_ostream &OS, const MCRegisterInfo *RegInfo, + void parse(DataExtractor data, unsigned Version); + void dump(raw_ostream &OS, uint64_t BaseAddr, const MCRegisterInfo *RegInfo, Optional Offset) const; /// Return the location list at the given offset or nullptr. LocationList const *getLocationListAtOffset(uint64_t Offset) const; - static Optional parseOneLocationList(DataExtractor Data, - uint32_t *Offset); + static Optional + parseOneLocationList(DataExtractor Data, unsigned *Offset, unsigned Version); }; } // end namespace llvm diff --git a/include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h b/include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h index ce7436d9faa387f93053ed159826da0008a2ca92..bc26edf006477350734b9130f53c58aa4394f359 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h +++ b/include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h @@ -18,7 +18,6 @@ namespace llvm { -struct BaseAddress; class raw_ostream; class DWARFDebugRangeList { @@ -78,7 +77,7 @@ public: /// list. Has to be passed base address of the compile unit referencing this /// range list. DWARFAddressRangesVector - getAbsoluteRanges(llvm::Optional BaseAddr) const; + getAbsoluteRanges(llvm::Optional BaseAddr) const; }; } // end namespace llvm diff --git a/include/llvm/DebugInfo/DWARF/DWARFDebugRnglists.h b/include/llvm/DebugInfo/DWARF/DWARFDebugRnglists.h index e2e8ab5ed2193bb50d1c43511b26cee861888524..5cc8d789e5980e1311d4cad3907fce076519586b 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFDebugRnglists.h +++ b/include/llvm/DebugInfo/DWARF/DWARFDebugRnglists.h @@ -10,6 +10,7 @@ #ifndef LLVM_DEBUGINFO_DWARFDEBUGRNGLISTS_H #define LLVM_DEBUGINFO_DWARFDEBUGRNGLISTS_H +#include "llvm/ADT/Optional.h" #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/DebugInfo/DIContext.h" #include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h" @@ -23,6 +24,7 @@ namespace llvm { class Error; class raw_ostream; +class DWARFUnit; /// A class representing a single range list entry. struct RangeListEntry : public DWARFListEntryBase { @@ -35,7 +37,9 @@ struct RangeListEntry : public DWARFListEntryBase { Error extract(DWARFDataExtractor Data, uint32_t End, uint32_t *OffsetPtr); void dump(raw_ostream &OS, uint8_t AddrSize, uint8_t MaxEncodingStringLength, - uint64_t &CurrentBase, DIDumpOptions DumpOpts) const; + uint64_t &CurrentBase, DIDumpOptions DumpOpts, + llvm::function_ref(uint32_t)> + LookupPooledAddress) const; bool isSentinel() const { return EntryKind == dwarf::DW_RLE_end_of_list; } }; @@ -44,7 +48,8 @@ class DWARFDebugRnglist : public DWARFListType { public: /// Build a DWARFAddressRangesVector from a rangelist. DWARFAddressRangesVector - getAbsoluteRanges(llvm::Optional BaseAddr) const; + getAbsoluteRanges(llvm::Optional BaseAddr, + DWARFUnit &U) const; }; class DWARFDebugRnglistTable : public DWARFListTableBase { diff --git a/include/llvm/DebugInfo/DWARF/DWARFDie.h b/include/llvm/DebugInfo/DWARF/DWARFDie.h index c77034f6348fe336ab3ff3491dc71cb7c3cab78f..baa47c2bfa580494c7bb1996bd1eca24dabbd662 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFDie.h +++ b/include/llvm/DebugInfo/DWARF/DWARFDie.h @@ -404,6 +404,10 @@ public: Die = Die.getPreviousSibling(); } + llvm::DWARFDie::iterator base() const { + return llvm::DWARFDie::iterator(AtEnd ? Die : Die.getSibling()); + } + reverse_iterator &operator++() { assert(!AtEnd && "Incrementing rend"); llvm::DWARFDie D = Die.getPreviousSibling(); diff --git a/include/llvm/DebugInfo/DWARF/DWARFFormValue.h b/include/llvm/DebugInfo/DWARF/DWARFFormValue.h index 1b5f71c946f9fb7998eb0702f55a1a412c3ef010..edf9442acd054674da49357d24bde7466b9ffa0d 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFFormValue.h +++ b/include/llvm/DebugInfo/DWARF/DWARFFormValue.h @@ -101,6 +101,7 @@ public: Optional getAsSignedConstant() const; Optional getAsCString() const; Optional getAsAddress() const; + Optional getAsSectionedAddress() const; Optional getAsSectionOffset() const; Optional> getAsBlock() const; Optional getAsCStringOffset() const; @@ -238,6 +239,13 @@ inline Optional toAddress(const Optional &V) { return None; } +inline Optional +toSectionedAddress(const Optional &V) { + if (V) + return V->getAsSectionedAddress(); + return None; +} + /// Take an optional DWARFFormValue and extract a address. /// /// \param V and optional DWARFFormValue to attempt to extract the value from. diff --git a/include/llvm/DebugInfo/DWARF/DWARFGdbIndex.h b/include/llvm/DebugInfo/DWARF/DWARFGdbIndex.h index 8d1ac5c83c234ed0886657f4fd680bcb711c7b2f..073e02903c39e1ebf72416d974a71439c26a227e 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFGdbIndex.h +++ b/include/llvm/DebugInfo/DWARF/DWARFGdbIndex.h @@ -24,6 +24,7 @@ class DWARFGdbIndex { uint32_t Version; uint32_t CuListOffset; + uint32_t TuListOffset; uint32_t AddressAreaOffset; uint32_t SymbolTableOffset; uint32_t ConstantPoolOffset; @@ -34,6 +35,13 @@ class DWARFGdbIndex { }; SmallVector CuList; + struct TypeUnitEntry { + uint64_t Offset; + uint64_t TypeOffset; + uint64_t TypeSignature; + }; + SmallVector TuList; + struct AddressEntry { uint64_t LowAddress; /// The low address. uint64_t HighAddress; /// The high address. @@ -55,6 +63,7 @@ class DWARFGdbIndex { uint32_t StringPoolOffset; void dumpCUList(raw_ostream &OS) const; + void dumpTUList(raw_ostream &OS) const; void dumpAddressArea(raw_ostream &OS) const; void dumpSymbolTable(raw_ostream &OS) const; void dumpConstantPool(raw_ostream &OS) const; diff --git a/include/llvm/DebugInfo/DWARF/DWARFListTable.h b/include/llvm/DebugInfo/DWARF/DWARFListTable.h index 8cf9e4008921437805f3e98900572b9fbf7342a3..9b987314f209495505382c2afac559a495295dcb 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFListTable.h +++ b/include/llvm/DebugInfo/DWARF/DWARFListTable.h @@ -99,6 +99,7 @@ public: uint32_t getHeaderOffset() const { return HeaderOffset; } uint8_t getAddrSize() const { return HeaderData.AddrSize; } uint32_t getLength() const { return HeaderData.Length; } + uint16_t getVersion() const { return HeaderData.Version; } StringRef getSectionName() const { return SectionName; } StringRef getListTypeString() const { return ListTypeString; } dwarf::DwarfFormat getFormat() const { return Format; } @@ -156,7 +157,10 @@ public: uint32_t getHeaderOffset() const { return Header.getHeaderOffset(); } uint8_t getAddrSize() const { return Header.getAddrSize(); } - void dump(raw_ostream &OS, DIDumpOptions DumpOpts = {}) const; + void dump(raw_ostream &OS, + llvm::function_ref(uint32_t)> + LookupPooledAddress, + DIDumpOptions DumpOpts = {}) const; /// Return the contents of the offset entry designated by a given index. Optional getOffsetEntry(uint32_t Index) const { @@ -229,8 +233,11 @@ Error DWARFListType::extract(DWARFDataExtractor Data, } template -void DWARFListTableBase::dump(raw_ostream &OS, - DIDumpOptions DumpOpts) const { +void DWARFListTableBase::dump( + raw_ostream &OS, + llvm::function_ref(uint32_t)> + LookupPooledAddress, + DIDumpOptions DumpOpts) const { Header.dump(OS, DumpOpts); OS << HeaderString << "\n"; @@ -249,7 +256,7 @@ void DWARFListTableBase::dump(raw_ostream &OS, for (const auto &List : ListMap) for (const auto &Entry : List.second.getEntries()) Entry.dump(OS, getAddrSize(), MaxEncodingStringLength, CurrentBase, - DumpOpts); + DumpOpts, LookupPooledAddress); } template diff --git a/include/llvm/DebugInfo/DWARF/DWARFObject.h b/include/llvm/DebugInfo/DWARF/DWARFObject.h index 6e8f370f4aeafed9080b646e187d56590b4315b6..5a808b0ec6a97ba13486475b9b8ba602e5a4dffa 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFObject.h +++ b/include/llvm/DebugInfo/DWARF/DWARFObject.h @@ -33,11 +33,13 @@ public: virtual ArrayRef getSectionNames() const { return {}; } virtual bool isLittleEndian() const = 0; virtual uint8_t getAddressSize() const { llvm_unreachable("unimplemented"); } - virtual const DWARFSection &getInfoSection() const { return Dummy; } + virtual void + forEachInfoSections(function_ref F) const {} virtual void forEachTypesSections(function_ref F) const {} virtual StringRef getAbbrevSection() const { return ""; } virtual const DWARFSection &getLocSection() const { return Dummy; } + virtual const DWARFSection &getLoclistsSection() const { return Dummy; } virtual StringRef getARangeSection() const { return ""; } virtual StringRef getDebugFrameSection() const { return ""; } virtual StringRef getEHFrameSection() const { return ""; } @@ -52,7 +54,8 @@ public: virtual StringRef getGnuPubNamesSection() const { return ""; } virtual StringRef getGnuPubTypesSection() const { return ""; } virtual const DWARFSection &getStringOffsetSection() const { return Dummy; } - virtual const DWARFSection &getInfoDWOSection() const { return Dummy; } + virtual void + forEachInfoDWOSections(function_ref F) const {} virtual void forEachTypesDWOSections(function_ref F) const {} virtual StringRef getAbbrevDWOSection() const { return ""; } diff --git a/include/llvm/DebugInfo/DWARF/DWARFSection.h b/include/llvm/DebugInfo/DWARF/DWARFSection.h index 77045f0794ae826b7caa96fb17e42c6ac95455ff..7f82359652971ac2bf23afcfb16796f09c164386 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFSection.h +++ b/include/llvm/DebugInfo/DWARF/DWARFSection.h @@ -23,6 +23,11 @@ struct SectionName { bool IsNameUnique; }; +struct SectionedAddress { + uint64_t Address; + uint64_t SectionIndex; +}; + } // end namespace llvm #endif // LLVM_DEBUGINFO_DWARF_DWARFSECTION_H diff --git a/include/llvm/DebugInfo/DWARF/DWARFTypeUnit.h b/include/llvm/DebugInfo/DWARF/DWARFTypeUnit.h index 0a5a1aaa79d11f473c2b63cdc641fd8067d371c2..8ca5ba13fc2334fb73eff7a293b29a9c67ae81c8 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFTypeUnit.h +++ b/include/llvm/DebugInfo/DWARF/DWARFTypeUnit.h @@ -26,13 +26,13 @@ class raw_ostream; class DWARFTypeUnit : public DWARFUnit { public: DWARFTypeUnit(DWARFContext &Context, const DWARFSection &Section, - const DWARFUnitHeader &Header, - const DWARFDebugAbbrev *DA, const DWARFSection *RS, + const DWARFUnitHeader &Header, const DWARFDebugAbbrev *DA, + const DWARFSection *RS, const DWARFSection *LocSection, StringRef SS, const DWARFSection &SOS, const DWARFSection *AOS, const DWARFSection &LS, bool LE, bool IsDWO, const DWARFUnitVector &UnitVector) - : DWARFUnit(Context, Section, Header, DA, RS, SS, SOS, AOS, LS, LE, IsDWO, - UnitVector) {} + : DWARFUnit(Context, Section, Header, DA, RS, LocSection, SS, SOS, AOS, + LS, LE, IsDWO, UnitVector) {} uint64_t getTypeHash() const { return getHeader().getTypeHash(); } uint32_t getTypeOffset() const { return getHeader().getTypeOffset(); } diff --git a/include/llvm/DebugInfo/DWARF/DWARFUnit.h b/include/llvm/DebugInfo/DWARF/DWARFUnit.h index 39d43b91485f1bf84c0fb393feae33d144ab5751..458278e4282fd777386ae0bc847d9976dcd51579 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFUnit.h +++ b/include/llvm/DebugInfo/DWARF/DWARFUnit.h @@ -153,18 +153,13 @@ public: private: void addUnitsImpl(DWARFContext &Context, const DWARFObject &Obj, const DWARFSection &Section, const DWARFDebugAbbrev *DA, - const DWARFSection *RS, StringRef SS, - const DWARFSection &SOS, const DWARFSection *AOS, - const DWARFSection &LS, bool LE, bool IsDWO, bool Lazy, - DWARFSectionKind SectionKind); + const DWARFSection *RS, const DWARFSection *LocSection, + StringRef SS, const DWARFSection &SOS, + const DWARFSection *AOS, const DWARFSection &LS, bool LE, + bool IsDWO, bool Lazy, DWARFSectionKind SectionKind); }; /// Represents base address of the CU. -struct BaseAddress { - uint64_t Address; - uint64_t SectionIndex; -}; - /// Represents a unit's contribution to the string offsets table. struct StrOffsetsContributionDescriptor { uint64_t Base = 0; @@ -198,13 +193,19 @@ class DWARFUnit { const DWARFDebugAbbrev *Abbrev; const DWARFSection *RangeSection; uint32_t RangeSectionBase; + /// We either keep track of the location list section or its data, depending + /// on whether we are handling a split DWARF section or not. + union { + const DWARFSection *LocSection; + StringRef LocSectionData; + }; const DWARFSection &LineSection; StringRef StringSection; const DWARFSection &StringOffsetSection; const DWARFSection *AddrOffsetSection; uint32_t AddrOffsetSectionBase = 0; bool isLittleEndian; - bool isDWO; + bool IsDWO; const DWARFUnitVector &UnitVector; /// Start, length, and DWARF format of the unit's contribution to the string @@ -215,7 +216,7 @@ class DWARFUnit { Optional RngListTable; mutable const DWARFAbbreviationDeclarationSet *Abbrevs; - llvm::Optional BaseAddr; + llvm::Optional BaseAddr; /// The compile unit debug information entry items. std::vector DieArray; @@ -245,29 +246,30 @@ protected: /// length and form. The given offset is expected to be derived from the unit /// DIE's DW_AT_str_offsets_base attribute. Optional - determineStringOffsetsTableContribution(DWARFDataExtractor &DA, - uint64_t Offset); + determineStringOffsetsTableContribution(DWARFDataExtractor &DA); /// Find the unit's contribution to the string offsets table and determine its /// length and form. The given offset is expected to be 0 in a dwo file or, /// in a dwp file, the start of the unit's contribution to the string offsets /// table section (as determined by the index table). Optional - determineStringOffsetsTableContributionDWO(DWARFDataExtractor &DA, - uint64_t Offset); + determineStringOffsetsTableContributionDWO(DWARFDataExtractor &DA); public: DWARFUnit(DWARFContext &Context, const DWARFSection &Section, - const DWARFUnitHeader &Header, - const DWARFDebugAbbrev *DA, const DWARFSection *RS, StringRef SS, - const DWARFSection &SOS, const DWARFSection *AOS, + const DWARFUnitHeader &Header, const DWARFDebugAbbrev *DA, + const DWARFSection *RS, const DWARFSection *LocSection, + StringRef SS, const DWARFSection &SOS, const DWARFSection *AOS, const DWARFSection &LS, bool LE, bool IsDWO, const DWARFUnitVector &UnitVector); virtual ~DWARFUnit(); + bool isDWOUnit() const { return IsDWO; } DWARFContext& getContext() const { return Context; } const DWARFSection &getInfoSection() const { return InfoSection; } + const DWARFSection *getLocSection() const { return LocSection; } + StringRef getLocSectionData() const { return LocSectionData; } uint32_t getOffset() const { return Header.getOffset(); } const dwarf::FormParams &getFormParams() const { return Header.getFormParams(); @@ -301,8 +303,8 @@ public: RangeSectionBase = Base; } - bool getAddrOffsetSectionItem(uint32_t Index, uint64_t &Result) const; - bool getStringOffsetSectionItem(uint32_t Index, uint64_t &Result) const; + Optional getAddrOffsetSectionItem(uint32_t Index) const; + Optional getStringOffsetSectionItem(uint32_t Index) const; DWARFDataExtractor getDebugInfoExtractor() const; @@ -372,7 +374,7 @@ public: llvm_unreachable("Invalid UnitType."); } - llvm::Optional getBaseAddress(); + llvm::Optional getBaseAddress(); DWARFDie getUnitDIE(bool ExtractUnitDIEOnly = true) { extractDIEsIfNeeded(ExtractUnitDIEOnly); diff --git a/include/llvm/DebugInfo/DWARF/DWARFVerifier.h b/include/llvm/DebugInfo/DWARF/DWARFVerifier.h index 3ad65cf51b1b7bf869f17ad9666128d3626a9322..e47fbea5646ed523cd55555f9f09bc117ffff623 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFVerifier.h +++ b/include/llvm/DebugInfo/DWARF/DWARFVerifier.h @@ -97,6 +97,9 @@ private: /// lies between to valid DIEs. std::map> ReferenceToDIEOffsets; uint32_t NumDebugLineErrors = 0; + // Used to relax some checks that do not currently work portably + bool IsObjectFile; + bool IsMachOObject; raw_ostream &error() const; raw_ostream &warn() const; @@ -286,8 +289,8 @@ private: public: DWARFVerifier(raw_ostream &S, DWARFContext &D, - DIDumpOptions DumpOpts = DIDumpOptions::getForSingleDIE()) - : OS(S), DCtx(D), DumpOpts(std::move(DumpOpts)) {} + DIDumpOptions DumpOpts = DIDumpOptions::getForSingleDIE()); + /// Verify the information in any of the following sections, if available: /// .debug_abbrev, debug_abbrev.dwo /// diff --git a/include/llvm/DebugInfo/PDB/DIA/DIAEnumFrameData.h b/include/llvm/DebugInfo/PDB/DIA/DIAEnumFrameData.h new file mode 100644 index 0000000000000000000000000000000000000000..f3b02f07e6485272c45e378133db5bb71764078f --- /dev/null +++ b/include/llvm/DebugInfo/PDB/DIA/DIAEnumFrameData.h @@ -0,0 +1,36 @@ +//==- DIAEnumFrameData.h --------------------------------------- -*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_PDB_DIA_DIAENUMFRAMEDATA_H +#define LLVM_DEBUGINFO_PDB_DIA_DIAENUMFRAMEDATA_H + +#include "DIASupport.h" +#include "llvm/DebugInfo/PDB/IPDBEnumChildren.h" +#include "llvm/DebugInfo/PDB/IPDBFrameData.h" + +namespace llvm { +namespace pdb { + +class DIAEnumFrameData : public IPDBEnumChildren { +public: + explicit DIAEnumFrameData(CComPtr DiaEnumerator); + + uint32_t getChildCount() const override; + ChildTypePtr getChildAtIndex(uint32_t Index) const override; + ChildTypePtr getNext() override; + void reset() override; + +private: + CComPtr Enumerator; +}; + +} // namespace pdb +} // namespace llvm + +#endif diff --git a/include/llvm/DebugInfo/PDB/DIA/DIAFrameData.h b/include/llvm/DebugInfo/PDB/DIA/DIAFrameData.h new file mode 100644 index 0000000000000000000000000000000000000000..0ce6cfc93030d043da7dc9e48698db7f3251c722 --- /dev/null +++ b/include/llvm/DebugInfo/PDB/DIA/DIAFrameData.h @@ -0,0 +1,39 @@ +//===- DIAFrameData.h - DIA Impl. of IPDBFrameData ---------------- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_PDB_DIA_DIAFRAMEDATA_H +#define LLVM_DEBUGINFO_PDB_DIA_DIAFRAMEDATA_H + +#include "DIASupport.h" +#include "llvm/DebugInfo/PDB/IPDBFrameData.h" + +namespace llvm { +namespace pdb { + +class DIASession; + +class DIAFrameData : public IPDBFrameData { +public: + explicit DIAFrameData(CComPtr DiaFrameData); + + uint32_t getAddressOffset() const override; + uint32_t getAddressSection() const override; + uint32_t getLengthBlock() const override; + std::string getProgram() const override; + uint32_t getRelativeVirtualAddress() const override; + uint64_t getVirtualAddress() const override; + +private: + CComPtr FrameData; +}; + +} // namespace pdb +} // namespace llvm + +#endif diff --git a/include/llvm/DebugInfo/PDB/DIA/DIASession.h b/include/llvm/DebugInfo/PDB/DIA/DIASession.h index e355605c2960c26a095b32548adc0da589d7e7e9..592e061a8d83f13755b13a6eb097e090e71fcbaa 100644 --- a/include/llvm/DebugInfo/PDB/DIA/DIASession.h +++ b/include/llvm/DebugInfo/PDB/DIA/DIASession.h @@ -85,6 +85,7 @@ public: std::unique_ptr getSectionContribs() const override; + std::unique_ptr getFrameData() const override; private: CComPtr Session; }; diff --git a/include/llvm/DebugInfo/PDB/GenericError.h b/include/llvm/DebugInfo/PDB/GenericError.h index 4e2e8b163b533e52b575f520545239183ef3d0a0..7b5a85295963b78760b81d3dd8bd36d51da647fa 100644 --- a/include/llvm/DebugInfo/PDB/GenericError.h +++ b/include/llvm/DebugInfo/PDB/GenericError.h @@ -21,24 +21,23 @@ enum class pdb_error_code { dia_sdk_not_present, dia_failed_loading, signature_out_of_date, - type_server_not_found, unspecified, }; -} // namespace codeview +} // namespace pdb } // namespace llvm namespace std { - template <> - struct is_error_code_enum : std::true_type {}; +template <> +struct is_error_code_enum : std::true_type {}; } // namespace std namespace llvm { namespace pdb { - const std::error_category &PDBErrCategory(); +const std::error_category &PDBErrCategory(); - inline std::error_code make_error_code(pdb_error_code E) { - return std::error_code(static_cast(E), PDBErrCategory()); - } +inline std::error_code make_error_code(pdb_error_code E) { + return std::error_code(static_cast(E), PDBErrCategory()); +} /// Base class for errors originating when parsing raw PDB files class PDBError : public ErrorInfo { diff --git a/include/llvm/DebugInfo/PDB/IPDBFrameData.h b/include/llvm/DebugInfo/PDB/IPDBFrameData.h new file mode 100644 index 0000000000000000000000000000000000000000..74679215b88017286aadd7dafc98060ddbae8cd9 --- /dev/null +++ b/include/llvm/DebugInfo/PDB/IPDBFrameData.h @@ -0,0 +1,36 @@ +//===- IPDBFrameData.h - base interface for frame data ----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_PDB_IPDBFRAMEDATA_H +#define LLVM_DEBUGINFO_PDB_IPDBFRAMEDATA_H + +#include +#include + +namespace llvm { +namespace pdb { + +/// IPDBFrameData defines an interface used to represent a frame data of some +/// code block. +class IPDBFrameData { +public: + virtual ~IPDBFrameData(); + + virtual uint32_t getAddressOffset() const = 0; + virtual uint32_t getAddressSection() const = 0; + virtual uint32_t getLengthBlock() const = 0; + virtual std::string getProgram() const = 0; + virtual uint32_t getRelativeVirtualAddress() const = 0; + virtual uint64_t getVirtualAddress() const = 0; +}; + +} // namespace pdb +} // namespace llvm + +#endif diff --git a/include/llvm/DebugInfo/PDB/IPDBSession.h b/include/llvm/DebugInfo/PDB/IPDBSession.h index 24573cdb7797883f821f52b6af591106e3d93c29..88fd02c0a345287a8c82a213416a2dd627c1c908 100644 --- a/include/llvm/DebugInfo/PDB/IPDBSession.h +++ b/include/llvm/DebugInfo/PDB/IPDBSession.h @@ -91,6 +91,9 @@ public: virtual std::unique_ptr getSectionContribs() const = 0; + + virtual std::unique_ptr + getFrameData() const = 0; }; } // namespace pdb } // namespace llvm diff --git a/include/llvm/DebugInfo/PDB/Native/NativeSession.h b/include/llvm/DebugInfo/PDB/Native/NativeSession.h index 07ce85ef820da92e56e417f12ef228bf5671dfcb..4878e47d31217f7d4871365d3ae172331e2df960 100644 --- a/include/llvm/DebugInfo/PDB/Native/NativeSession.h +++ b/include/llvm/DebugInfo/PDB/Native/NativeSession.h @@ -93,6 +93,8 @@ public: std::unique_ptr getSectionContribs() const override; + std::unique_ptr getFrameData() const override; + PDBFile &getPDBFile() { return *Pdb; } const PDBFile &getPDBFile() const { return *Pdb; } diff --git a/include/llvm/DebugInfo/PDB/Native/TpiStream.h b/include/llvm/DebugInfo/PDB/Native/TpiStream.h index 00cc720336cf1337d8c9979e4175660286dc93a4..b76576a7a263bb4675a9dfab39cd348549a46450 100644 --- a/include/llvm/DebugInfo/PDB/Native/TpiStream.h +++ b/include/llvm/DebugInfo/PDB/Native/TpiStream.h @@ -61,6 +61,10 @@ public: Expected findFullDeclForForwardRef(codeview::TypeIndex ForwardRefTI) const; + std::vector findRecordsByName(StringRef Name) const; + + codeview::CVType getType(codeview::TypeIndex Index); + BinarySubstreamRef getTypeRecordsSubstream() const; Error commit(); diff --git a/include/llvm/DebugInfo/PDB/PDBTypes.h b/include/llvm/DebugInfo/PDB/PDBTypes.h index 6247018ce0f12ee4f5b359369bcea82fedcc12c1..917f3ed73910af8f87682ebc702afbe92135c5bc 100644 --- a/include/llvm/DebugInfo/PDB/PDBTypes.h +++ b/include/llvm/DebugInfo/PDB/PDBTypes.h @@ -12,6 +12,7 @@ #include "llvm/DebugInfo/CodeView/CodeView.h" #include "llvm/DebugInfo/PDB/IPDBEnumChildren.h" +#include "llvm/DebugInfo/PDB/IPDBFrameData.h" #include "llvm/DebugInfo/PDB/Native/RawTypes.h" #include #include @@ -71,6 +72,7 @@ using IPDBEnumLineNumbers = IPDBEnumChildren; using IPDBEnumTables = IPDBEnumChildren; using IPDBEnumInjectedSources = IPDBEnumChildren; using IPDBEnumSectionContribs = IPDBEnumChildren; +using IPDBEnumFrameData = IPDBEnumChildren; /// Specifies which PDB reader implementation is to be used. Only a value /// of PDB_ReaderType::DIA is currently supported, but Native is in the works. diff --git a/include/llvm/Demangle/ItaniumDemangle.h b/include/llvm/Demangle/ItaniumDemangle.h index bc60bc3454e30f81525b14cbbd65e60d3279e508..c5619a15bbee9370355c1c9a48748d0ba72b28a3 100644 --- a/include/llvm/Demangle/ItaniumDemangle.h +++ b/include/llvm/Demangle/ItaniumDemangle.h @@ -2134,8 +2134,7 @@ public: } }; -template -struct Db { +template struct AbstractManglingParser { const char *First; const char *Last; @@ -2167,7 +2166,10 @@ struct Db { Alloc ASTAllocator; - Db(const char *First_, const char *Last_) : First(First_), Last(Last_) {} + AbstractManglingParser(const char *First_, const char *Last_) + : First(First_), Last(Last_) {} + + Derived &getDerived() { return static_cast(*this); } void reset(const char *First_, const char *Last_) { First = First_; @@ -2274,7 +2276,7 @@ struct Db { FunctionRefQual ReferenceQualifier = FrefQualNone; size_t ForwardTemplateRefsBegin; - NameState(Db *Enclosing) + NameState(AbstractManglingParser *Enclosing) : ForwardTemplateRefsBegin(Enclosing->ForwardTemplateRefs.size()) {} }; @@ -2324,35 +2326,36 @@ const char* parse_discriminator(const char* first, const char* last); // // ::= // ::= -template Node *Db::parseName(NameState *State) { +template +Node *AbstractManglingParser::parseName(NameState *State) { consumeIf('L'); // extension if (look() == 'N') - return parseNestedName(State); + return getDerived().parseNestedName(State); if (look() == 'Z') - return parseLocalName(State); + return getDerived().parseLocalName(State); // ::= if (look() == 'S' && look(1) != 't') { - Node *S = parseSubstitution(); + Node *S = getDerived().parseSubstitution(); if (S == nullptr) return nullptr; if (look() != 'I') return nullptr; - Node *TA = parseTemplateArgs(State != nullptr); + Node *TA = getDerived().parseTemplateArgs(State != nullptr); if (TA == nullptr) return nullptr; if (State) State->EndsWithTemplateArgs = true; return make(S, TA); } - Node *N = parseUnscopedName(State); + Node *N = getDerived().parseUnscopedName(State); if (N == nullptr) return nullptr; // ::= if (look() == 'I') { Subs.push_back(N); - Node *TA = parseTemplateArgs(State != nullptr); + Node *TA = getDerived().parseTemplateArgs(State != nullptr); if (TA == nullptr) return nullptr; if (State) State->EndsWithTemplateArgs = true; @@ -2365,10 +2368,11 @@ template Node *Db::parseName(NameState *State) { // := Z E [] // := Z E s [] // := Z Ed [ ] _ -template Node *Db::parseLocalName(NameState *State) { +template +Node *AbstractManglingParser::parseLocalName(NameState *State) { if (!consumeIf('Z')) return nullptr; - Node *Encoding = parseEncoding(); + Node *Encoding = getDerived().parseEncoding(); if (Encoding == nullptr || !consumeIf('E')) return nullptr; @@ -2384,13 +2388,13 @@ template Node *Db::parseLocalName(NameState *State) { parseNumber(true); if (!consumeIf('_')) return nullptr; - Node *N = parseName(State); + Node *N = getDerived().parseName(State); if (N == nullptr) return nullptr; return make(Encoding, N); } - Node *Entity = parseName(State); + Node *Entity = getDerived().parseName(State); if (Entity == nullptr) return nullptr; First = parse_discriminator(First, Last); @@ -2400,14 +2404,16 @@ template Node *Db::parseLocalName(NameState *State) { // ::= // ::= St # ::std:: // extension ::= StL -template Node *Db::parseUnscopedName(NameState *State) { - if (consumeIf("StL") || consumeIf("St")) { - Node *R = parseUnqualifiedName(State); - if (R == nullptr) - return nullptr; - return make(R); - } - return parseUnqualifiedName(State); +template +Node * +AbstractManglingParser::parseUnscopedName(NameState *State) { + if (consumeIf("StL") || consumeIf("St")) { + Node *R = getDerived().parseUnqualifiedName(State); + if (R == nullptr) + return nullptr; + return make(R); + } + return getDerived().parseUnqualifiedName(State); } // ::= [abi-tags] @@ -2415,27 +2421,28 @@ template Node *Db::parseUnscopedName(NameState *State) { // ::= // ::= // ::= DC + E # structured binding declaration -template -Node *Db::parseUnqualifiedName(NameState *State) { +template +Node * +AbstractManglingParser::parseUnqualifiedName(NameState *State) { // s are special-cased in parseNestedName(). Node *Result; if (look() == 'U') - Result = parseUnnamedTypeName(State); + Result = getDerived().parseUnnamedTypeName(State); else if (look() >= '1' && look() <= '9') - Result = parseSourceName(State); + Result = getDerived().parseSourceName(State); else if (consumeIf("DC")) { size_t BindingsBegin = Names.size(); do { - Node *Binding = parseSourceName(State); + Node *Binding = getDerived().parseSourceName(State); if (Binding == nullptr) return nullptr; Names.push_back(Binding); } while (!consumeIf('E')); Result = make(popTrailingNodeArray(BindingsBegin)); } else - Result = parseOperatorName(State); + Result = getDerived().parseOperatorName(State); if (Result != nullptr) - Result = parseAbiTags(Result); + Result = getDerived().parseAbiTags(Result); return Result; } @@ -2445,7 +2452,9 @@ Node *Db::parseUnqualifiedName(NameState *State) { // ::= Ul E [ ] _ // // ::= + # Parameter types or "v" if the lambda has no parameters -template Node *Db::parseUnnamedTypeName(NameState *) { +template +Node * +AbstractManglingParser::parseUnnamedTypeName(NameState *) { if (consumeIf("Ut")) { StringView Count = parseNumber(); if (!consumeIf('_')) @@ -2458,7 +2467,7 @@ template Node *Db::parseUnnamedTypeName(NameState *) { if (!consumeIf("vE")) { size_t ParamsBegin = Names.size(); do { - Node *P = parseType(); + Node *P = getDerived().parseType(); if (P == nullptr) return nullptr; Names.push_back(P); @@ -2474,7 +2483,8 @@ template Node *Db::parseUnnamedTypeName(NameState *) { } // ::= -template Node *Db::parseSourceName(NameState *) { +template +Node *AbstractManglingParser::parseSourceName(NameState *) { size_t Length = 0; if (parsePositiveInteger(&Length)) return nullptr; @@ -2538,7 +2548,9 @@ template Node *Db::parseSourceName(NameState *) { // ::= rS # >>= // ::= ss # <=> C++2a // ::= v # vendor extended operator -template Node *Db::parseOperatorName(NameState *State) { +template +Node * +AbstractManglingParser::parseOperatorName(NameState *State) { switch (look()) { case 'a': switch (look(1)) { @@ -2578,7 +2590,7 @@ template Node *Db::parseOperatorName(NameState *State) { SwapAndRestore SavePermit(PermitForwardTemplateReferences, PermitForwardTemplateReferences || State != nullptr); - Node* Ty = parseType(); + Node *Ty = getDerived().parseType(); if (Ty == nullptr) return nullptr; if (State) State->CtorDtorConversion = true; @@ -2642,7 +2654,7 @@ template Node *Db::parseOperatorName(NameState *State) { // ::= li # operator "" case 'i': { First += 2; - Node *SN = parseSourceName(State); + Node *SN = getDerived().parseSourceName(State); if (SN == nullptr) return nullptr; return make(SN); @@ -2763,7 +2775,7 @@ template Node *Db::parseOperatorName(NameState *State) { case 'v': if (std::isdigit(look(1))) { First += 2; - Node *SN = parseSourceName(State); + Node *SN = getDerived().parseSourceName(State); if (SN == nullptr) return nullptr; return make(SN); @@ -2781,8 +2793,10 @@ template Node *Db::parseOperatorName(NameState *State) { // ::= D1 # complete object destructor // ::= D2 # base object destructor // extension ::= D5 # ? -template -Node *Db::parseCtorDtorName(Node *&SoFar, NameState *State) { +template +Node * +AbstractManglingParser::parseCtorDtorName(Node *&SoFar, + NameState *State) { if (SoFar->getKind() == Node::KSpecialSubstitution) { auto SSK = static_cast(SoFar)->SSK; switch (SSK) { @@ -2793,6 +2807,7 @@ Node *Db::parseCtorDtorName(Node *&SoFar, NameState *State) { SoFar = make(SSK); if (!SoFar) return nullptr; + break; default: break; } @@ -2806,7 +2821,7 @@ Node *Db::parseCtorDtorName(Node *&SoFar, NameState *State) { ++First; if (State) State->CtorDtorConversion = true; if (IsInherited) { - if (parseName(State) == nullptr) + if (getDerived().parseName(State) == nullptr) return nullptr; } return make(SoFar, false, Variant); @@ -2840,7 +2855,9 @@ Node *Db::parseCtorDtorName(Node *&SoFar, NameState *State) { // ::=